Skip to content

Commit

Permalink
[GPU] sink reshape for reorder+reshape+permute pattern opt (openvinot…
Browse files Browse the repository at this point in the history
…oolkit#28183)

### Details:
- when looking at yolov6s, there is quite a lot of
conv+reorder+reshape+permute pattern as below

![image](https://github.com/user-attachments/assets/6c4706c8-cada-4761-977f-9e36161add0e)
convolution ([N,W,C,H], byxf) -> reorder ([N,W,C,H), bfyx) -> reshape
[N, W, CxH], bfyx -> permute order (0, 2,1) to [N, CxH, W] bfyx, which
is equivalent/alias to ([N,W,C,H] byxf), and this execution chain can be
optimized
- in this pr, we are sinking reshape to after permute if applicable, so
that we can leverage existing optimizations like reorder/conv+permute to
achieve overall opt

### Tickets:
- CVS-159840

---------

Signed-off-by: fishbell <[email protected]>
Co-authored-by: River Li <[email protected]>
Co-authored-by: Chen Peter <[email protected]>
  • Loading branch information
3 people authored Feb 11, 2025
1 parent f47bfc2 commit a5f7ec3
Show file tree
Hide file tree
Showing 4 changed files with 312 additions and 0 deletions.
165 changes: 165 additions & 0 deletions src/plugins/intel_gpu/src/plugin/transformations/sink_reshape.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "sink_reshape.hpp"

#include "intel_gpu/op/convolution.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/pass/pattern/op/or.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/utils/utils.hpp"

namespace ov {
namespace intel_gpu {

SinkReshape::SinkReshape() {
using namespace ov::pass::pattern;
using ov::pass::pattern::op::Or;
using namespace ov::op;

auto reshape_predicate = [](const ov::Output<ov::Node>& output) -> bool {
auto supported_conv_act_post_ops_for_fuse = [](const std::shared_ptr<const Node>& node) -> bool {
return ov::is_type<v0::Relu>(node) || ov::is_type<v0::Elu>(node) || ov::is_type<v0::Sigmoid>(node) ||
ov::is_type<v5::HSigmoid>(node) || ov::is_type<v0::Clamp>(node) || ov::is_type<v4::Swish>(node) ||
ov::is_type<v4::HSwish>(node) || ov::is_type<v4::Mish>(node) || ov::is_type<v5::Round>(node) ||
ov::is_type<v4::Mish>(node) || ov::is_type<v5::Round>(node);
};
auto supported_conv_eltwise_post_ops_for_fuse = [](const std::shared_ptr<const Node>& node) -> bool {
if (ov::is_type<v1::Add>(node) || ov::is_type<v1::Subtract>(node) || ov::is_type<v1::Multiply>(node) ||
ov::is_type<v1::Divide>(node))
return std::dynamic_pointer_cast<v0::Constant>(node->get_input_node_shared_ptr(1)) != nullptr;
return ov::is_type<v0::Exp>(node);
};
std::function<bool(const std::shared_ptr<ov::Node>&)> is_suitable_parent;
is_suitable_parent = [&](const std::shared_ptr<ov::Node>& node) -> bool {
if (node->get_users().size() != 1 || node->is_dynamic())
return false;
if (ov::as_type_ptr<op::Convolution>(node))
return true;
for (size_t idx = 0; idx < node->get_input_size(); idx++) {
auto input = node->get_input_node_shared_ptr(idx);
if (ov::as_type_ptr<v0::Constant>(node))
continue;
if (supported_conv_eltwise_post_ops_for_fuse(node)) {
return is_suitable_parent(input);
} else if (supported_conv_act_post_ops_for_fuse(node)) {
return is_suitable_parent(input);
}
return false;
}
return false;
};
// reshape supported only in one case, if two consecutive input dims are merged into 1
auto is_suitable_reshape = [](const std::shared_ptr<ov::Node>& node) -> bool {
if (node->is_dynamic())
return false;
auto& in_ps = node->get_input_partial_shape(0);
auto& out_ps = node->get_output_partial_shape(0);
if (in_ps.size() - out_ps.size() != 1)
return false;
size_t mismatch_count = 0;
for (size_t i = 0; i < out_ps.size(); ++i) {
if (i + mismatch_count >= in_ps.size())
return false;
if (out_ps[i] != in_ps[i + mismatch_count]) {
mismatch_count++;
}
}
return mismatch_count == 1;
};
const auto reshape = ov::as_type_ptr<v1::Reshape>(output.get_node_shared_ptr());
return is_suitable_reshape(reshape) && is_suitable_parent(reshape->get_input_node_shared_ptr(0));
};

auto reshape_m = wrap_type<v1::Reshape>(reshape_predicate);
auto transpose_const_m = wrap_type<v0::Constant>();
auto transpose_m = wrap_type<v1::Transpose>({reshape_m, transpose_const_m});

ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto reshape = std::dynamic_pointer_cast<v1::Reshape>(pattern_map.at(reshape_m).get_node_shared_ptr());
if (!reshape || transformation_callback(reshape)) {
return false;
}

auto update_order = [](std::vector<uint16_t> original_order, const std::shared_ptr<v1::Reshape>& reshape_node) {
// Example. For this sequence, there is Reshape node which merges 2 consecutive dims into one
// order must be updated like permute is done before reshape
// [1,3,4,6] -> Reshape[1,3,24]-> permute(0,2,1) -> [1,24,3]
// updated order must be (0,2,3,1):
// dim with index=2 is split into 2 parts: 2 and 3
auto reshape_in_shape = reshape_node->get_input_partial_shape(0).to_shape();
auto reshape_out_shape = reshape_node->get_output_partial_shape(0).to_shape();
auto transformed_order = original_order;
ov::Shape new_shape(transformed_order.size());
const uint16_t merge_dim_idx = [&]() {
for (uint16_t i = 0; i < reshape_out_shape.size(); ++i) {
if (reshape_in_shape[i] != reshape_out_shape[i])
return i;
}
OPENVINO_THROW("same input/output for reshape node");
}();
auto insertIt = transformed_order.end();
for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) {
auto& elem = *it;
if (elem > merge_dim_idx) {
elem++;
} else if (elem == merge_dim_idx) {
insertIt = it + 1;
}
}
transformed_order.insert(insertIt, merge_dim_idx + 1);
return transformed_order;
};

// allow tranposes which rotate feature dim to back to be taken as inner-most axis
auto check_transpose_order = [](std::vector<uint16_t>& order) -> bool {
if (order.size() <= 2)
return false;
if ((int32_t)order[order.size() - 2] != order.size() - 1)
return false;
if ((int32_t)order[0] != 0)
return false;
for (int32_t i = 2; i < (int32_t)order.size(); ++i) {
if ((int32_t)order[i - 1] != i)
return false;
}
return true;
};

auto transpose = std::dynamic_pointer_cast<v1::Transpose>(pattern_map.at(transpose_m).get_node_shared_ptr());
if (pattern_map.count(transpose_const_m) > 0) {
auto org_transpose_m = pattern_map.at(transpose_const_m).get_node_shared_ptr();
auto org_transpose_os = transpose->get_output_shape(0);
auto tranpose_order = std::dynamic_pointer_cast<v0::Constant>(org_transpose_m);
auto updated_order = update_order(tranpose_order->cast_vector<uint16_t>(), reshape);
if (check_transpose_order(updated_order)) {
auto updated_transpose_order = std::make_shared<v0::Constant>(tranpose_order->get_element_type(),
ov::Shape(1, updated_order.size()),
updated_order);
updated_transpose_order->set_friendly_name(tranpose_order->get_friendly_name() + "_updated");
auto new_transpose =
std::make_shared<v1::Transpose>(reshape->input(0).get_source_output(), updated_transpose_order);
new_transpose->set_friendly_name(transpose->get_friendly_name() + "_with_updated_order");
copy_runtime_info(transpose, new_transpose);
ov::replace_node(reshape, new_transpose);
auto new_pattern_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32,
ov::Shape{org_transpose_os.size()},
org_transpose_os);
auto new_reshape = std::make_shared<ov::op::v1::Reshape>(new_transpose,
new_pattern_const,
reshape->get_special_zero());
new_reshape->set_friendly_name(reshape->get_friendly_name() + "_sinked_after_transpose");
copy_runtime_info(reshape, new_reshape);
ov::replace_node(transpose, new_reshape);
}
}
return true;
};
auto m = std::make_shared<ov::pass::pattern::Matcher>(transpose_m, "SinkReshapeIfNeeded");
this->register_matcher(m, callback);
}
} // namespace intel_gpu
} // namespace ov
19 changes: 19 additions & 0 deletions src/plugins/intel_gpu/src/plugin/transformations/sink_reshape.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/graph_rewrite.hpp"

namespace ov {
namespace intel_gpu {

class SinkReshape: public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("SinkReshapeIfNeeded");
SinkReshape();
};

} // namespace intel_gpu
} // namespace ov
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include "plugin/transformations/dynamic_quantize_fully_connected.hpp"
#include "plugin/transformations/optimize_subsequent_reshapes.hpp"
#include "plugin/transformations/lora_horizontal_fusion.hpp"
#include "plugin/transformations/sink_reshape.hpp"
#include "transformations/common_optimizations/nop_elimination.hpp"
#include "transformations/common_optimizations/rms_fusion.hpp"
#include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp"
Expand Down Expand Up @@ -1097,6 +1098,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
// This Validate is needed for proper data type propagation after applying IncreasePositionIdsPrecision pass
manager.register_pass<ov::pass::Validate>();

manager.register_pass<ov::intel_gpu::SinkReshape>();

if (device_info.supports_immad) {
auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size);
pass_config->set_callback<ov::intel_gpu::DynamicQuantizeFullyConnected>([=](const_node_ptr& root) -> bool {
Expand Down
125 changes: 125 additions & 0 deletions src/plugins/intel_gpu/tests/unit/transformations/sink_reshape_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>

#include <string>
#include <memory>

#include <openvino/core/model.hpp>
#include <openvino/opsets/opset1.hpp>
#include "openvino/op/softmax.hpp"
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "plugin/transformations/sink_reshape.hpp"
#include "plugin/transformations/convert_convolution.hpp"
#include "common_test_utils/ov_test_utils.hpp"

using namespace testing;
using namespace ov::intel_gpu;

using SinkReshapeParams = std::tuple<bool, // add eltwise
bool, // add activation
bool, // eligible rotation
bool>; // eligible reshape

class SinkReshapeTests : public TransformationTestsF, public WithParamInterface<SinkReshapeParams> {
public:
static std::string get_test_case_name(testing::TestParamInfo<SinkReshapeParams> obj) {
std::pair<ov::PartialShape, ov::Shape> input_shapes;
bool add_eltwise;
bool add_activation;
bool eligible_rotataion;
bool eligible_reshape;
std::tie(add_eltwise, add_activation, eligible_rotataion, eligible_reshape) = obj.param;

std::ostringstream result;
result << ")_add_eltwise=" << add_eltwise << "_add_activationt=" << add_activation << "_eligible_rotation=" << eligible_rotataion << "_eligible_reshape=" << eligible_reshape;
return result.str();
}

static std::shared_ptr<ov::Model> init_model(const bool add_eltwise,
const bool add_activation,
const bool eligible_rotation,
const bool eligible_reshape,
const bool ref) {
ov::Strides strides{1, 1};
ov::Strides dilations{1, 1};
ov::CoordinateDiff pads_begin{0, 0};
ov::CoordinateDiff pads_end{0, 0};
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{ 2, 3, 12, 12 });
auto weights_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 3, 3, 3 }, { 1 });
auto conv = std::make_shared<ov::op::v1::Convolution>(input,
weights_const,
strides,
pads_begin,
pads_end,
dilations,
ov::op::PadType::EXPLICIT);
std::shared_ptr<ov::Node> reshape_input_node = conv;
if (add_eltwise) {
auto sub_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1}, {1});
reshape_input_node = std::make_shared<ov::opset1::Subtract>(reshape_input_node, sub_const);
}

if (add_activation) {
reshape_input_node = std::make_shared<ov::opset1::Sigmoid>(reshape_input_node);
}
std::shared_ptr<ov::Model> model = nullptr;
if (!ref) {
auto shape = eligible_reshape ? std::vector<int>{2, 4, 100} : std::vector<int>{2, 2, 20};
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {3}, shape);
auto reshape = std::make_shared<ov::opset1::Reshape>(reshape_input_node, reshape_const, true);
auto order = eligible_rotation ? std::vector<int>{0 ,2, 1} : std::vector<int>{2, 1, 0};
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, order);
auto transpose = std::make_shared<ov::opset1::Transpose>(reshape, transpose_const);

auto softmax = std::make_shared<ov::op::v8::Softmax>(transpose);
model = std::make_shared<ov::Model>(ov::NodeVector{softmax}, ov::ParameterVector{input});
} else {
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {4}, {0, 2, 3, 1});
auto transpose = std::make_shared<ov::opset1::Transpose>(reshape_input_node, transpose_const);
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {3}, {2, 100, 4});
auto reshape = std::make_shared<ov::opset1::Reshape>(transpose, reshape_const, true);
auto softmax = std::make_shared<ov::op::v8::Softmax>(reshape);
model = std::make_shared<ov::Model>(ov::NodeVector{softmax}, ov::ParameterVector{input});
}
ov::pass::Manager manager;
manager.register_pass<ConvertConvolutionToInternal>();
if (!ref)
manager.register_pass<SinkReshape>();
manager.run_passes(model);
return model;
}

protected:
void SetUp() override {
TransformationTestsF::SetUp();
bool add_eltwise;
bool add_activation;
bool eligible_rotation;
bool eligible_reshape;
std::tie(add_eltwise, add_activation, eligible_rotation, eligible_reshape) = this->GetParam();

model = init_model(add_eltwise, add_activation, eligible_rotation, eligible_reshape, true);
if (!eligible_rotation || !eligible_reshape)
model_ref = model->clone();
else
model_ref = init_model(add_eltwise, add_activation, eligible_rotation, eligible_reshape, false);
}
};

TEST_P(SinkReshapeTests, CompareFunctions) {}

const std::vector<bool> add_eltwise = {false, true};
const std::vector<bool> add_activation = {false, true};
const std::vector<bool> eligible_rotation = {false, true};
const std::vector<bool> eligible_reshape = {false, true};

INSTANTIATE_TEST_SUITE_P(smoke_TransformationTests_reshape_transpose, SinkReshapeTests,
::testing::Combine(
::testing::ValuesIn(add_eltwise),
::testing::ValuesIn(add_activation),
::testing::ValuesIn(eligible_rotation),
::testing::ValuesIn(eligible_reshape)),
SinkReshapeTests::get_test_case_name);

0 comments on commit a5f7ec3

Please sign in to comment.