microsoft · wejoncy · Nov 28, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
@@ -13,14 +13,6 @@ using namespace CoreML::Specification;
 namespace onnxruntime {
 namespace coreml {
 
-// Once all ops are supportted FP16, we can remove it. Before that, we keep a set of ops to
-// filter suppported ones.
-static std::set<std::string> Float16Ops = {
-    "Add", "ArgMax", "AveragePool", "BatchNormalization", "Cast", "Clip", "Concat", "Conv", "ConvTranspose",
-    "DepthToSpace", "Div", "Gelu", "Gemm", "GlobalAveragePool", "GlobalMaxPool", "GridSample", "GroupNormalization",
-    "InstanceNormalization", "LayerNormalization", "LeakyRelu", "MatMul", "MaxPool", "Mul", "PRelu", "Pow",
-    "Reciprocal", "Relu", "Reshape", "Resize", "Sigmoid", "Slice", "Split", "Sqrt", "Sub", "Tanh", "Transpose"};
-
 namespace {
 // TODO, move this to shared_library
 bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node,
@@ -114,13 +106,10 @@ bool BaseOpBuilder::IsInputDtypeSupport(const Node& node, size_t idx,
     return true;
   }
 
-// only support MLProgram for FP16
-#if defined(COREML_ENABLE_MLPROGRAM)
-  if (input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 &&
-      Float16Ops.count(node.OpType())) {
-    return true;
+  // only MLProgram support FP16
+  if (!input_params.create_mlprogram && input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) {
+    return false;
   }
-#endif
 
   LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
   return false;

diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
@@ -6,6 +6,7 @@
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/impl/builder_utils.h"
+#include "core/providers/coreml/shape_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"
@@ -55,6 +56,47 @@ bool CheckIfBothInputShapesMatch(const Node& node, const logging::Logger& logger
 }
 }  // namespace
 
+// Add variadic inputs to the model builder
+// in onnx spec, some node allows variadic inputs, such as max(x, y, z, ...)
+// while in coreml, maximum op only allows two inputs maximum(x, y)
+// the conversion is doing the following:
+// max(x, y, z, ...) -> max(max(x, y), z, ...)
+#if defined(COREML_ENABLE_MLPROGRAM)
+static void AddVariadicInputs(std::unique_ptr<CoreML::Specification::MILSpec::Operation>* op,
+                              ModelBuilder& model_builder,
+                              const Node& node,
+                              const logging::Logger& logger) {
+  using namespace CoreML::Specification::MILSpec;
+  const auto& input_defs(node.InputDefs());
+  std::string_view layer_input_name_x = model_builder.GetUniqueName(node, "variadic");
+  auto input_dtype = input_defs[0]->TypeAsProto()->tensor_type().elem_type();
+  const int32_t elem_type = static_cast<int32_t>(input_dtype);
+  std::vector<int64_t> x0_shape;
+  auto x0_dim_size = input_defs[0]->Shape()->dim_size();
+  auto x1_dim_size = input_defs[1]->Shape()->dim_size();
+  x0_dim_size = std::max(x0_dim_size, x1_dim_size);
+  // fill x0_shape with -1 to make this dimension as dynamic
+  // Coreml supports dynamic shape when the shape value is -1
+  x0_shape.resize(x0_dim_size, -1);
+  std::unique_ptr<Operation> op_prev = std::move(*op);
+  for (size_t i = 2; i < input_defs.size(); i++) {
+    AddIntermediateOperationOutput(*op_prev, layer_input_name_x, elem_type, x0_shape);
+    std::unique_ptr<Operation> op_cur = model_builder.CreateOperation(node, op_prev->type());
+    AddOperationInput(*op_cur, "x", layer_input_name_x);
+    AddOperationInput(*op_cur, "y", input_defs[i]->Name());
+    model_builder.AddOperation(std::move(op_prev));
+    op_prev = std::move(op_cur);
+    layer_input_name_x = model_builder.GetUniqueName(node, "variadic");
+    x1_dim_size = input_defs[i]->Shape()->dim_size();
+    if (x0_dim_size < x1_dim_size) {
+      x0_dim_size = x1_dim_size;
+      x0_shape.resize(x0_dim_size, -1);
+    }
+  }
+  *op = std::move(op_prev);
+}
+#endif
+
 Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                               const logging::Logger& logger) const {
   const auto& op_type(node.OpType());
@@ -70,6 +112,8 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
       coreml_op_type = "add";
     } else if (op_type == "Mul") {
       coreml_op_type = "mul";
+    } else if (op_type == "Max") {
+      coreml_op_type = "maximum";
     } else if (op_type == "Sub") {
       coreml_op_type = "sub";
     } else if (op_type == "Div") {
@@ -86,8 +130,11 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
     AddOperationInput(*op, "x", input_defs[0]->Name());
     AddOperationInput(*op, "y", input_defs[1]->Name());
+    if (input_defs.size() > 2) {
+      // "max" node may have variadic inputs
+      AddVariadicInputs(&op, model_builder, node, logger);
+    }
     AddOperationOutput(*op, *node.OutputDefs()[0]);
-
     model_builder.AddOperation(std::move(op));
   } else
 #endif  // defined (COREML_ENABLE_MLPROGRAM)
@@ -157,6 +204,10 @@ bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderIn
     return false;
   }
 
+  if (node.OpType() == "Max" && !input_params.create_mlprogram) {
+    return false;
+  }
+
   return true;
 }
 

diff --git a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc
@@ -5,6 +5,7 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"
@@ -20,6 +21,7 @@ class ReductionOpBuilder : public BaseOpBuilder {
 
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
+  bool SupportsMLProgram() const override { return true; }
 };
 
 namespace {
@@ -48,13 +50,12 @@ Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co
                                                  const logging::Logger& /* logger */) const {
   const auto& op_type(node.OpType());
   const auto& input_defs(node.InputDefs());
-  const auto& initializers(model_builder.GetInitializerTensors());
 
   std::vector<int64_t> axes;
 
   NodeAttrHelper helper(node);
   if (input_defs.size() > 1 && input_defs[1]->Exists()) {
-    auto& axes_tensor = *initializers.at(input_defs[1]->Name());
+    auto& axes_tensor = *model_builder.GetConstantInitializer(input_defs[1]->Name());
     Initializer axes_initializer(axes_tensor);
     int64_t* data = axes_initializer.data<int64_t>();
     int64_t size = axes_initializer.size();
@@ -66,29 +67,67 @@ Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co
 
   const bool keepdims = helper.Get("keepdims", 1) != 0;
   const bool noop_with_empty_axes = helper.Get("noop_with_empty_axes", 0) != 0;
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;
+
+    std::string_view coreml_op_type;
+    if (noop_with_empty_axes && axes.size() == 0) {
+      coreml_op_type = "identity";
+    } else if (op_type == "ReduceSum") {
+      coreml_op_type = "reduce_sum";
+    } else if (op_type == "ReduceMean") {
+      coreml_op_type = "reduce_mean";
+    } else if (op_type == "ReduceMax") {
+      coreml_op_type = "reduce_max";
+    } else if (op_type == "ReduceMin") {
+      coreml_op_type = "reduce_min";
+    } else if (op_type == "ReduceProd") {
+      coreml_op_type = "reduce_prod";
+    } else {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "ReductionOpBuilder::AddToModelBuilderImpl, unexpected op: ", op_type);
+    }
+    std::unique_ptr<Operation> op = model_builder.CreateOperation(node, coreml_op_type);
+    AddOperationInput(*op, "x", input_defs[0]->Name());
+    if (coreml_op_type != "identity") {
+      if (axes.size() > 0) {
+        AddOperationInput(*op, "axes", model_builder.AddConstant(op->type(), "axes", axes));
+      }
+      AddOperationInput(*op, "keep_dims", model_builder.AddScalarConstant(op->type(), "keep_dims", keepdims));
+    }
+    AddOperationOutput(*op, *node.OutputDefs()[0]);
+
+    model_builder.AddOperation(std::move(op));
+  } else
+#endif  // (COREML_ENABLE_MLPROGRAM)
+  {
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+
+    if (op_type == "ReduceSum") {
+      AddReductionParams(layer->mutable_reducesum(), axes, keepdims, noop_with_empty_axes);
+    } else if (op_type == "ReduceMean") {
+      AddReductionParams(layer->mutable_reducemean(), axes, keepdims, noop_with_empty_axes);
+    } else {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "ReductionOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
+    }
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+    *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
+    *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
 
-  if (op_type == "ReduceSum") {
-    AddReductionParams(layer->mutable_reducesum(), axes, keepdims, noop_with_empty_axes);
-  } else if (op_type == "ReduceMean") {
-    AddReductionParams(layer->mutable_reducemean(), axes, keepdims, noop_with_empty_axes);
-  } else {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                           "ReductionOpBuilder::AddToModelBuilderImpl, unknown op: ", op_type);
+    model_builder.AddLayer(std::move(layer));
   }
-
-  *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
-
-  model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
 
 bool ReductionOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                            const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
-
+  if (!input_params.create_mlprogram &&
+      (node.OpType() == "ReduceMax" || node.OpType() == "ReduceMin" || node.OpType() == "ReduceProd")) {
+    return false;
+  }
   NodeAttrHelper helper(node);
 
   // noop_with_empty_axes defaults to false and is only available in newer opsets where 'axes' is an optional input
@@ -99,18 +138,16 @@ bool ReductionOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInpu
   if (input_defs.size() > 1 && input_defs[1]->Exists()) {
     // 'axes' is optional input in new opsets
     const auto& axes_name = input_defs[1]->Name();
-    const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors();
-    if (!Contains(initializers, axes_name)) {
+    const auto* axes = input_params.graph_viewer.GetConstantInitializer(axes_name);
+    if (!axes) {
       LOGS(logger, VERBOSE) << "Axes of reduction must be a constant initializer";
       return false;
     }
 
-    empty_axes = initializers.at(axes_name)->int64_data_size() == 0;
+    empty_axes = axes->int64_data_size() == 0;
   }
-
-  if (empty_axes && noop_with_empty_axes) {
-    // TODO: When we add ML Program support we should enable this as it makes the node an Identity op
-    LOGS(logger, VERBOSE) << "CoreML doesn't support noop on empty axes for reduction layers" << std::endl;
+  if (empty_axes && noop_with_empty_axes && !input_params.create_mlprogram) {
+    LOGS(logger, VERBOSE) << "NeuralNetwork doesn't support noop on empty axes for reduction layers";
     return false;
   }
 

diff --git a/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc
@@ -2,7 +2,9 @@
 // Licensed under the MIT License.
 
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
+#include "core/providers/coreml/shape_utils.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"  // for NodeAttrHelper
 
@@ -14,28 +16,120 @@ class ShapeOpBuilder : public BaseOpBuilder {
 
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
+  bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                              const logging::Logger& logger) const override;
+  bool SupportsMLProgram() const override { return true; }
 };
 
 Status ShapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                              const logging::Logger& /*logger*/) const {
-  auto layer = model_builder.CreateNNLayer(node);
-  layer->mutable_getshape();
-  *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
-  *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
-  model_builder.AddLayer(std::move(layer));
+  const auto& input_defs = node.InputDefs();
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;
+    NodeAttrHelper node_attr_helper{node};
+    int64_t num_dims = input_defs[0]->Shape()->dim_size();
+    int64_t start = HandleNegativeAxis(node_attr_helper.Get("start", 0), num_dims);
+
+    int64_t size = -1;
+    if (node_attr_helper.HasAttr("end")) {
+      int64_t end = HandleNegativeAxis(node_attr_helper.Get("end", -1), num_dims);
+      size = end - start;
+    }
+
+    int32_t output_datatype = ONNX_NAMESPACE::TensorProto_DataType_INT32;
+    std::unique_ptr<Operation> op = model_builder.CreateOperation(node, "shape");
+    AddOperationInput(*op, "x", input_defs[0]->Name());
+    if (size != -1 || start != 0) {
+      std::string_view layer_input_name_x = model_builder.GetUniqueName(node, "slice_by_size");
+      std::vector<int64_t> x0_shape{num_dims};
+      AddIntermediateOperationOutput(*op, layer_input_name_x, output_datatype, x0_shape);
+      model_builder.AddOperation(std::move(op));
+
+      auto slice_op = model_builder.CreateOperation(node, "slice_by_size");
+      AddOperationInput(*slice_op, "x", layer_input_name_x);
+      std::vector<int64_t> starts = {start};
+      std::vector<int64_t> sizes = {size};
+      AddOperationInput(*slice_op, "begin", model_builder.AddConstant(slice_op->type(), "begin", starts));
+      AddOperationInput(*slice_op, "size", model_builder.AddConstant(slice_op->type(), "size", sizes));
+      AddOperationOutput(*slice_op, *node.OutputDefs()[0], output_datatype);
+      model_builder.AddOperation(std::move(slice_op));
+    } else {
+      AddOperationOutput(*op, *node.OutputDefs()[0], output_datatype);
+      model_builder.AddOperation(std::move(op));
+    }
+  } else  // NOLINT
+#endif
+  {
+    auto layer = model_builder.CreateNNLayer(node);
+    layer->mutable_getshape();
+    *layer->mutable_input()->Add() = input_defs[0]->Name();
+    *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
+    model_builder.AddLayer(std::move(layer));
+  }
   return Status::OK();
 }
 
-bool ShapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
+bool ShapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                        const logging::Logger& logger) const {
+  const auto* tensor_shape = node.InputDefs()[0]->Shape();
+
   NodeAttrHelper node_attr_helper{node};
-  if (node_attr_helper.Get("start", 0) != 0) {
-    LOGS(logger, VERBOSE) << "Shape does not support 'start' attribute with value other than 0";
+  if (!input_params.create_mlprogram) {
+    if (node_attr_helper.HasAttr("end")) {
+      LOGS(logger, VERBOSE) << "Shape does not support 'end' attribute";
+      return false;
+    }
+
+    if (node_attr_helper.Get("start", 0) != 0) {
+      LOGS(logger, VERBOSE) << "Shape does not support 'start' attribute with value other than 0";
+      return false;
+    }
+  } else {
+    int64_t size = node_attr_helper.HasAttr("end")
+                       ? HandleNegativeAxis(node_attr_helper.Get("end", 0), tensor_shape->dim_size())
+                       : tensor_shape->dim_size();
+    int64_t start = HandleNegativeAxis(node_attr_helper.Get("start", 0), tensor_shape->dim_size());
+    size = size - start;
+    if (size == 0) {
+      LOGS(logger, VERBOSE) << "Shape does not support slicing when size is 0";
+      return false;
+    } else if (size != tensor_shape->dim_size() && tensor_shape == nullptr) {
+      LOGS(logger, VERBOSE) << "Shape does not support slicing when tensor_shape is not available";
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool ShapeOpBuilder::HasSupportedInputsImpl(const Node& node,
+                                            [[maybe_unused]] const OpBuilderInputParams& input_params,
+                                            const logging::Logger& logger) const {
+  // We only check the type of input 0
+  const auto& input = *node.InputDefs()[0];
+
+  int32_t input_type;
+  if (!GetType(input, input_type, logger)) {
     return false;
   }
 
-  if (node_attr_helper.HasAttr("end")) {
-    LOGS(logger, VERBOSE) << "Shape does not support 'end' attribute";
+  if (input_params.create_mlprogram) {
+    if ((input_type == ONNX_NAMESPACE::TensorProto_DataType_INT32 ||
+         input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
+         input_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16)) {
+      return true;
+    } else {
+      LOGS(logger, VERBOSE) << "[" << node.OpType()
+                            << "] Input type: [" << input_type
+                            << "] is not supported.";
+      return false;
+    }
+  } else if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+    LOGS(logger, VERBOSE) << "[" << node.OpType()
+                          << "] Input type: [" << input_type
+                          << "] is not supported.";
     return false;
   }