Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 30 additions & 18 deletions onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,29 +61,17 @@ class QLinearConv : public OpKernel {
static void ComputeOffset(OpKernelContext* context,
int64_t M,
ActType& X_zero_point_value,
ActType& Y_zero_point_value,
uint8_t& W_zero_point_value) {
ActType& Y_zero_point_value) {
const Tensor* X_zero_point = context->Input<Tensor>(InputTensors::IN_X_ZERO_POINT);
const Tensor* W_zero_point = context->Input<Tensor>(InputTensors::IN_W_ZERO_POINT);
const Tensor* Y_zero_point = context->Input<Tensor>(InputTensors::IN_Y_ZERO_POINT);
ORT_ENFORCE(IsScalarOr1ElementVector(X_zero_point),
"QLinearConv : input zero point must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(IsScalarOr1ElementVector(Y_zero_point),
"QLinearConv : result zero point must be a scalar or 1D tensor of size 1");
ORT_ENFORCE(IsValidQuantParam(W_zero_point, M),
"QLinearConv : filter zero point shape invalid");

X_zero_point_value = *(X_zero_point->Data<ActType>());
Y_zero_point_value = *(Y_zero_point->Data<ActType>());

const int64_t W_zero_point_size = W_zero_point->Shape().Size();
const auto* W_zero_point_data = static_cast<const uint8_t*>(W_zero_point->DataRaw());
W_zero_point_value = W_zero_point_data[0];
for (int64_t i = 1; i < W_zero_point_size; i++) {
ORT_ENFORCE(W_zero_point_data[i] == W_zero_point_value,
"QLinearConv : zero point of per-channel filter must be same. "
"This happens by design if the quantization is symmetric.");
}
ORT_UNUSED_PARAMETER(M);
}

static std::vector<float> ComputeOutputScale(OpKernelContext* context,
Expand Down Expand Up @@ -531,10 +519,27 @@ Status QLinearConv<ActType>::Compute(OpKernelContext* context) const {

ActType X_zero_point_value;
ActType Y_zero_point_value;
uint8_t W_zero_point_value;
ComputeOffset(context, M, X_zero_point_value, Y_zero_point_value, W_zero_point_value);
ComputeOffset(context, M, X_zero_point_value, Y_zero_point_value);
std::vector<float> output_scales = ComputeOutputScale(context, M);

// Read weight zero points (may be scalar or per-channel).
const Tensor* W_zero_point = context->Input<Tensor>(InputTensors::IN_W_ZERO_POINT);
ORT_ENFORCE(IsValidQuantParam(W_zero_point, M), "QLinearConv : filter zero point shape invalid");
const int64_t W_zero_point_size = W_zero_point->Shape().Size();
const auto* W_zero_point_data = static_cast<const uint8_t*>(W_zero_point->DataRaw());
// Per-channel zero points are uniform when size == 1 or all values match.
const bool W_zero_point_is_uniform =
(W_zero_point_size <= 1) ||
std::all_of(W_zero_point_data + 1, W_zero_point_data + W_zero_point_size,
[W_zero_point_data](uint8_t v) { return v == W_zero_point_data[0]; });
// When non-uniform, w_zero_point must be a full per-channel tensor of size M
// so that group_id * group_output_channels indexing is in bounds.
ORT_ENFORCE(W_zero_point_is_uniform || W_zero_point_size == M,
"QLinearConv : non-uniform weight zero point tensor size (", W_zero_point_size,
") must equal number of output channels (", M, ")");
Comment thread
tianleiwu marked this conversation as resolved.
// Single representative value used for paths that require a scalar zero point.
const uint8_t W_zero_point_value = W_zero_point_data[0];

const Tensor* B = context->Input<Tensor>(InputTensors::IN_BIAS);

ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X->Shape(), W_shape, channels_last_));
Expand Down Expand Up @@ -610,7 +615,11 @@ Status QLinearConv<ActType>::Compute(OpKernelContext* context) const {
int64_t group_output_channels = M / group_count;

// Test for depthwise convolution.
const bool is_depthwise_conv = ((is_symmetric_conv_ || reordered_W != nullptr) && group_input_channels == 1 && group_output_channels == 1);
// Depthwise path requires a single (uniform) filter zero point because
// MlasConvDepthwise accepts only a scalar FilterZeroPoint.
const bool is_depthwise_conv = (W_zero_point_is_uniform &&
(is_symmetric_conv_ || reordered_W != nullptr) &&
group_input_channels == 1 && group_output_channels == 1);
if (is_depthwise_conv) {
// Update the input and output channels to the number of groups in order to
// reuse as much of the below standard convolution path.
Expand Down Expand Up @@ -974,7 +983,10 @@ Status QLinearConv<ActType>::Compute(OpKernelContext* context) const {
gemm_params.B = reordered_W + group_id * group_output_channels,
gemm_params.ldb = static_cast<size_t>(M);
}
gemm_params.ZeroPointB = &W_zero_point_value;
gemm_params.ZeroPointB = !W_zero_point_is_uniform
? W_zero_point_data + group_id * group_output_channels
: &W_zero_point_value;
gemm_params.PerColumnZeroPoints = !W_zero_point_is_uniform;
Comment thread
tianleiwu marked this conversation as resolved.
Comment thread
tianleiwu marked this conversation as resolved.
gemm_params.C = worker_gemm_output + group_id * group_output_channels;
gemm_params.ldc = static_cast<size_t>(M);

Expand Down
160 changes: 158 additions & 2 deletions onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ class QLinearConvOpTester {
std::vector<int64_t> shape_;
std::vector<float> scale_;
T zero_point_{0};
std::vector<T> zero_points_; // per-channel zero points (empty = use zero_point_)
};

std::default_random_engine generator_{1234};
Expand Down Expand Up @@ -406,7 +407,6 @@ class QLinearConvOpTester {
const int64_t kernel_size = std::accumulate(
kernel_shape, kernel_shape + kernel_rank, 1LL, std::multiplies<int64_t>());
const int32_t X_zero_point = X_.zero_point_;
const int32_t W_zero_point = W_.zero_point_;

const ActType* Xdata = X_.data_.data();
ActType* Ydata = Y_data.data();
Expand All @@ -423,6 +423,10 @@ class QLinearConvOpTester {
int32_t bias = B_.empty() ? 0 : B_[channel_index];
float weight_scale = W_.scale_[(W_.scale_.size() == 1) ? 0 : channel_index];
float requantize_scale = (X_.scale_[0] * weight_scale) / output_scale_;
// Use per-channel zero point if available, otherwise use the single zero_point_.
const int32_t W_zero_point = W_.zero_points_.empty()
? static_cast<int32_t>(W_.zero_point_)
: static_cast<int32_t>(W_.zero_points_[channel_index]);

std::vector<int64_t> d_output(kernel_rank, 0);
std::vector<int64_t> d_kernel(kernel_rank, 0);
Expand Down Expand Up @@ -476,7 +480,12 @@ class QLinearConvOpTester {
const std::vector<int64_t> W_scale_shape{static_cast<int64_t>(W_.scale_.size())};
test.AddInput<FilterType>("w", W_.shape_, W_.data_, all_input_initializer_except_x);
test.AddInput<float>("w_scale", W_scale_shape, W_.scale_, all_input_initializer_except_x);
test.AddInput<FilterType>("w_zero_point", {}, {W_.zero_point_}, all_input_initializer_except_x);
if (!W_.zero_points_.empty()) {
const std::vector<int64_t> W_zp_shape{static_cast<int64_t>(W_.zero_points_.size())};
test.AddInput<FilterType>("w_zero_point", W_zp_shape, W_.zero_points_, all_input_initializer_except_x);
} else {
test.AddInput<FilterType>("w_zero_point", {}, {W_.zero_point_}, all_input_initializer_except_x);
}

test.AddInput<float>("y_scale", {}, {output_scale_}, all_input_initializer_except_x);
test.AddInput<ActType>("y_zero_point", {}, {output_zero_point_}, all_input_initializer_except_x);
Expand Down Expand Up @@ -543,6 +552,10 @@ class QLinearConvOpTester {
W_.scale_ = scales;
}

void SetWeightZeroPoints(const std::vector<FilterType>& zero_points) {
W_.zero_points_ = zero_points;
}

void GenerateRandomBias() {
ORT_ENFORCE(W_.shape_.size() >= 1);
const size_t output_channels = static_cast<size_t>(W_.shape_[0]);
Expand Down Expand Up @@ -1507,6 +1520,149 @@ TEST(QLinearConvTest, Conv2D_S8S8_Requantize_Bias_PerChannel) {
}
}

Comment thread
tianleiwu marked this conversation as resolved.
// Tests per-channel weight zero points with different values (the fix for the reported bug).
TEST(QLinearConvTest, Conv2D_U8U8_PerChannelZeroPoints) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect.";
}
Comment thread
tianleiwu marked this conversation as resolved.

for (int64_t channels : std::initializer_list<int64_t>{2, 4, 8, 16, 32}) {
QLinearConvOpTester<uint8_t, uint8_t> test;
test.GenerateRandomInput({1, 3, 9, 9}, .05f, 128);
test.GenerateRandomWeights({channels, 3, 3, 3}, .10f, 128);
std::vector<float> weight_scales;
std::vector<uint8_t> weight_zero_points;
for (int64_t i = 0; i < channels; i++) {
weight_scales.push_back(.10f + static_cast<float>(i) * .002f);
// Use different zero points per channel to exercise the per-channel path.
weight_zero_points.push_back(static_cast<uint8_t>(100 + i * 5));
}
test.SetWeightScales(weight_scales);
test.SetWeightZeroPoints(weight_zero_points);
test.GenerateRandomBias();
test.SetPads({1, 1, 1, 1});
test.SetOutputScaleAndZeroPoint(.55f, 128);
test.Run();
}
}

// Tests per-channel weight zero points with different values for int8 activations.
TEST(QLinearConvTest, Conv2D_S8S8_PerChannelZeroPoints) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect.";
}

for (int64_t channels : std::initializer_list<int64_t>{2, 4, 8, 16, 32}) {
QLinearConvOpTester<int8_t, int8_t> test;
test.GenerateRandomInput({1, 4, 7, 7}, .05f, 4);
test.GenerateRandomWeights({channels, 4, 3, 3}, .10f, 0);
std::vector<float> weight_scales;
std::vector<int8_t> weight_zero_points;
for (int64_t i = 0; i < channels; i++) {
weight_scales.push_back(.10f + static_cast<float>(i) * .003f);
// Use different (non-zero) zero points per channel.
weight_zero_points.push_back(static_cast<int8_t>(-10 + i * 3));
}
test.SetWeightScales(weight_scales);
test.SetWeightZeroPoints(weight_zero_points);
test.GenerateRandomBias();
test.SetPads({1, 1, 1, 1});
test.SetOutputScaleAndZeroPoint(.55f, -8);
test.Run();
}
}

// Tests per-channel weight zero points for grouped convolution.
TEST(QLinearConvTest, Conv2D_U8U8_Groups_PerChannelZeroPoints) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect.";
}

for (int64_t channels : std::initializer_list<int64_t>{4, 8, 16}) {
QLinearConvOpTester<uint8_t, uint8_t> test;
test.GenerateRandomInput({1, 8, 9, 9}, .05f, 128);
test.GenerateRandomWeights({channels, 4, 3, 3}, .10f, 128);
std::vector<float> weight_scales;
std::vector<uint8_t> weight_zero_points;
for (int64_t i = 0; i < channels; i++) {
weight_scales.push_back(.10f + static_cast<float>(i) * .002f);
weight_zero_points.push_back(static_cast<uint8_t>(80 + i * 7));
}
test.SetWeightScales(weight_scales);
test.SetWeightZeroPoints(weight_zero_points);
test.GenerateRandomBias();
test.SetPads({1, 1, 1, 1});
test.SetGroups(2);
test.SetOutputScaleAndZeroPoint(.55f, 128);
test.Run();
}
}

// Depthwise config (groups == channels) with non-uniform per-channel weight zero points.
// The kernel cannot use MlasConvDepthwise with distinct ZPs, so this validates the
// automatic fallback to the group-GEMM path.
TEST(QLinearConvTest, Conv2D_S8S8_DepthwiseFallback_PerChannelZeroPoints) {
Comment thread
tianleiwu marked this conversation as resolved.
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect.";
}

constexpr int64_t channels = 8;

QLinearConvOpTester<int8_t, int8_t> test;
test.GenerateRandomInput({1, channels, 9, 9}, .05f, 4);
test.GenerateRandomWeights({channels, 1, 3, 3}, .10f, 0);

std::vector<float> weight_scales;
std::vector<int8_t> weight_zero_points;
for (int64_t i = 0; i < channels; ++i) {
weight_scales.push_back(.10f + static_cast<float>(i) * .003f);
weight_zero_points.push_back(static_cast<int8_t>(-10 + i * 3));
}

test.SetWeightScales(weight_scales);
test.SetWeightZeroPoints(weight_zero_points);
test.GenerateRandomBias();
test.SetPads({1, 1, 1, 1});
test.SetGroups(channels);
test.SetOutputScaleAndZeroPoint(.55f, -8);
test.Run();
}

// Depthwise config (groups == channels) with a full per-channel zero-point tensor whose
// values are all identical. This should still use the depthwise fast path because the
// zero points are uniform even though the tensor shape is per-channel.
TEST(QLinearConvTest, Conv2D_S8S8_Depthwise_PerChannelUniformZeroPoints) {
// TODO: Unskip when fixed #41968513
if (DefaultDmlExecutionProvider().get() != nullptr) {
GTEST_SKIP() << "Skipping because of the following error: AbiCustomRegistry.cpp(507): The parameter is incorrect.";
}

constexpr int64_t channels = 8;

QLinearConvOpTester<int8_t, int8_t> test;
test.GenerateRandomInput({1, channels, 9, 9}, .05f, 4);
test.GenerateRandomWeights({channels, 1, 3, 3}, .10f, 0);

std::vector<float> weight_scales;
std::vector<int8_t> weight_zero_points;
for (int64_t i = 0; i < channels; ++i) {
weight_scales.push_back(.10f + static_cast<float>(i) * .003f);
weight_zero_points.push_back(static_cast<int8_t>(-7));
}

test.SetWeightScales(weight_scales);
test.SetWeightZeroPoints(weight_zero_points);
test.GenerateRandomBias();
test.SetPads({1, 1, 1, 1});
test.SetGroups(channels);
test.SetOutputScaleAndZeroPoint(.55f, -8);
test.Run();
}

TEST(QLinearConvTest, Conv2D_S8S8_Depthwise_Kernelsize) {
TestQLinearConv2dDepthwiseKernelsize<int8_t, int8_t>();
}
Expand Down
Loading