Skip to content

Commit 53a65f7

Browse files
committed
LLM Behavior tests
1 parent ac980f6 commit 53a65f7

File tree

6 files changed

+233
-44
lines changed

6 files changed

+233
-44
lines changed

src/plugins/intel_npu/tests/functional/behavior/npuw/behavior_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2024 Intel Corporation
1+
// Copyright (C) 2024-2025 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

src/plugins/intel_npu/tests/functional/behavior/npuw/behavior_tests.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2024 Intel Corporation
1+
// Copyright (C) 2024-2025 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44
#pragma once
Lines changed: 22 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2024 Intel Corporation
1+
// Copyright (C) 2024-2025 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

@@ -17,20 +17,11 @@ using namespace testing;
1717
using namespace ov::npuw::tests;
1818
using namespace ov::intel_npu::npuw;
1919

20-
21-
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
22-
const char* cpu_plugin_file_name = "openvino_arm_cpu_plugin";
23-
#elif defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
24-
const char* cpu_plugin_file_name = "openvino_intel_cpu_plugin";
25-
#elif defined(OPENVINO_ARCH_RISCV64)
26-
const char* cpu_plugin_file_name = "openvino_riscv_cpu_plugin";
27-
#else
28-
# error "Undefined system processor"
29-
#endif
30-
3120
// this tests load plugin by library name: this is not available during static linkage
3221
#ifndef OPENVINO_STATIC_LIBRARY
3322
#ifdef WITH_CPU_PLUGIN
23+
#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
24+
const char* cpu_plugin_file_name = "openvino_intel_cpu_plugin";
3425

3526
namespace {
3627
const std::vector<int64_t> What_is_OpenVINO =
@@ -43,12 +34,8 @@ using LLMTestParams = std::tuple<std::string, ov::AnyMap,
4334
std::vector<int64_t>, std::vector<int64_t>>;
4435
} // anonymous namespace
4536

46-
class LLMAccuracyTest : public ::testing::TestWithParam<LLMTestParams> {
37+
class LLMAccuracyTestsNPUW : public ::testing::TestWithParam<LLMTestParams> {
4738
public:
48-
ov::Core core;
49-
ov::AnyMap use_npuw_props;
50-
std::shared_ptr<ov::Model> model;
51-
5239
void SetUp() override {
5340
// NOTE: TEMPLATE plugin in OpenVINO works for ~20 minute to generate
5441
// first token from prefill model and crashes on launch of
@@ -67,54 +54,47 @@ class LLMAccuracyTest : public ::testing::TestWithParam<LLMTestParams> {
6754

6855
auto param = GetParam();
6956
ov::AnyMap config;
70-
std::tie(m_model_path, config, m_input_ids, m_reference_ids) = param;
57+
std::tie(model_path, config, input_ids, reference_ids) = param;
7158
config["NPUW_DEVICES"] = "CPU";
7259
config["NPUW_LLM_MIN_RESPONSE_LEN"] = 4;
73-
m_simple_llm.initialize(m_model_path, core, config);
60+
simple_llm.initialize(model_path, core, config);
7461
}
7562

76-
void generate() {
77-
m_actual_ids = m_simple_llm.generate(m_input_ids);
78-
PRINT_ARRAY(m_actual_ids, m_actual_ids.size());
79-
}
80-
81-
void accurate() {
82-
for (auto i = 0; i < m_actual_ids.size(); ++i) {
83-
ASSERT_EQ(m_actual_ids[i], m_reference_ids[i]);
84-
}
85-
}
86-
87-
private:
88-
std::string m_model_path;
89-
SimpleLLMPipeline m_simple_llm;
90-
std::vector<int64_t> m_input_ids;
91-
std::vector<int64_t> m_actual_ids;
92-
std::vector<int64_t> m_reference_ids;
63+
protected:
64+
ov::Core core;
65+
SimpleLLMPipeline simple_llm;
66+
std::string model_path;
67+
std::vector<int64_t> input_ids;
68+
std::vector<int64_t> actual_ids;
69+
std::vector<int64_t> reference_ids;
9370
};
9471

95-
TEST_P(LLMAccuracyTest, ConfigIsAccurate) {
96-
generate();
97-
accurate();
72+
TEST_P(LLMAccuracyTestsNPUW, ConfigIsAccurate) {
73+
actual_ids = simple_llm.generate(input_ids);
74+
for (auto i = 0; i < actual_ids.size(); ++i) {
75+
ASSERT_EQ(actual_ids[i], reference_ids[i]);
76+
}
9877
}
9978

100-
INSTANTIATE_TEST_SUITE_P(LLMAccuracyNPUW_FAST_COMPILE, LLMAccuracyTest,
79+
INSTANTIATE_TEST_SUITE_P(LLMAccuracyNPUW_FAST_COMPILE, LLMAccuracyTestsNPUW,
10180
::testing::Combine(testing::Values("C:\\apronina\\models\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\openvino_model.xml"),
10281
testing::Values(ov::AnyMap{}),
10382
testing::Values(What_is_OpenVINO),
10483
testing::Values(OpenVINO)));
10584

106-
INSTANTIATE_TEST_SUITE_P(LLMAccuracyNPUW_BEST_PERF, LLMAccuracyTest,
85+
INSTANTIATE_TEST_SUITE_P(LLMAccuracyNPUW_BEST_PERF, LLMAccuracyTestsNPUW,
10786
::testing::Combine(testing::Values("C:\\apronina\\models\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\openvino_model.xml"),
10887
testing::Values(ov::AnyMap{{"NPUW_LLM_GENERATE_HINT", "BEST_PERF"}}),
10988
testing::Values(What_is_OpenVINO),
11089
testing::Values(OpenVINO)));
11190

112-
INSTANTIATE_TEST_SUITE_P(LLMAccuracyNPUW_DYNAMIC_BEST_PERF, LLMAccuracyTest,
91+
INSTANTIATE_TEST_SUITE_P(LLMAccuracyNPUW_DYNAMIC_BEST_PERF, LLMAccuracyTestsNPUW,
11392
::testing::Combine(testing::Values("C:\\apronina\\models\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\openvino_model.xml"),
11493
testing::Values(ov::AnyMap{{"NPUW_LLM_GENERATE_HINT", "BEST_PERF"},
11594
{"NPUW_LLM_PREFILL_HINT", "DYNAMIC"}}),
11695
testing::Values(What_is_OpenVINO),
11796
testing::Values(OpenVINO)));
11897

98+
#endif // defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
11999
#endif // WITH_CPU_PLUGIN
120100
#endif // not OPENVINO_STATIC_LIBRARY
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// Copyright (C) 2024-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "test_engine/simple_llm_pipeline.hpp"
6+
#include "test_engine/mocks/mock_plugins.hpp"
7+
#include "test_engine/mocks/register_in_ov.hpp"
8+
#include "intel_npu/npuw_private_properties.hpp"
9+
#include "openvino/util/shared_object.hpp"
10+
#include "openvino/util/file_util.hpp"
11+
12+
#include <filesystem>
13+
14+
using namespace testing;
15+
using namespace ov::npuw::tests;
16+
using namespace ov::intel_npu::npuw;
17+
18+
namespace {
19+
const std::vector<int64_t> What_is_OpenVINO =
20+
{529, 29989, 1792, 29989, 29958, 13, 5618, 338, 4673, 29963, 1177,
21+
29949, 29973, 2, 29871, 13, 29966, 29989, 465, 22137, 29989, 29958, 13};
22+
} // anonymous namespace
23+
24+
#define TIMES(times) times
25+
26+
#define THROW(...) \
27+
.WillOnce(Throw(std::runtime_error(__VA_ARGS__)))
28+
29+
#define EXPECT_COMPILE_MODEL(device, times, ...) \
30+
EXPECT_CALL(*device##_plugin, \
31+
compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)) \
32+
.Times(times) \
33+
__VA_ARGS__ \
34+
35+
#define MODEL(idx) idx
36+
37+
#define INFER_REQ(idx) idx
38+
39+
#define EXPECT_CREATE_SYNC_INFER_REQ(device, model_idx, times, ...) \
40+
device##_plugin->set_expectations_to_comp_models(model_idx, [](MockCompiledModel& model) { \
41+
EXPECT_CALL(model, create_sync_infer_request()) \
42+
.Times(times) \
43+
__VA_ARGS__; \
44+
})
45+
46+
#define EXPECT_INFER(device, model_idx, times, ...) \
47+
device##_plugin->set_expectations_to_infer_reqs(model_idx, 0, [](MockInferRequest& request) { \
48+
EXPECT_CALL(request, infer()) \
49+
.Times(times) \
50+
__VA_ARGS__; \
51+
});
52+
53+
#define EXPECT_INFER_FOR(device, model_idx, req_idx, times, ...) \
54+
device##_plugin->set_expectations_to_infer_reqs(model_idx, req_idx, [](MockInferRequest& request) { \
55+
EXPECT_CALL(request, infer()) \
56+
.Times(times) \
57+
__VA_ARGS__; \
58+
});
59+
60+
namespace ov {
61+
namespace npuw {
62+
namespace tests {
63+
64+
class LLMBehaviorTestsNPUW : public ::testing::Test {
65+
public:
66+
void SetUp() override {
67+
mock_npu_for_prefill_plugin = std::make_shared<MockNpuPluginForPrefill>();
68+
mock_npu_for_prefill_plugin->create_implementation();
69+
mock_npu_for_generate_plugin = std::make_shared<MockNpuPluginForGenerate>();
70+
mock_npu_for_generate_plugin->create_implementation();
71+
config["++NPUW_LLM_PREFILL_CONFIG"] = ov::AnyMap{{"NPUW_DEVICES", "MockNPUForPrefill"}};
72+
config["++NPUW_LLM_GENERATE_CONFIG"] = ov::AnyMap{{"NPUW_DEVICES", "MockNPUForGenerate"}};
73+
config["NPUW_LLM_MIN_RESPONSE_LEN"] = 2;
74+
}
75+
76+
// Make sure it is called after expectations are set!
77+
void register_mock_plugins_in_ov() {
78+
m_shared_objects.push_back(reg_plugin<MockNpuPluginForPrefill>(core, mock_npu_for_prefill_plugin));
79+
m_shared_objects.push_back(reg_plugin<MockNpuPluginForGenerate>(core, mock_npu_for_generate_plugin));
80+
}
81+
82+
protected:
83+
ov::Core core;
84+
SimpleLLMPipeline simple_llm;
85+
std::shared_ptr<MockNpuPluginForPrefill> mock_npu_for_prefill_plugin;
86+
std::shared_ptr<MockNpuPluginForGenerate> mock_npu_for_generate_plugin;
87+
ov::AnyMap config;
88+
89+
private:
90+
std::vector<std::shared_ptr<void>> m_shared_objects;
91+
};
92+
} // namespace tests
93+
} // namespace npuw
94+
} // namespace ov
95+
96+
TEST_F(LLMBehaviorTestsNPUW, LLMBehaviorNPUW_FAST_COMPILE) {
97+
// Set expectations first:
98+
{
99+
InSequence seq;
100+
// Generate model:
101+
EXPECT_COMPILE_MODEL(mock_npu_for_generate, TIMES(3));
102+
// Prefill model:
103+
EXPECT_COMPILE_MODEL(mock_npu_for_prefill, TIMES(3));
104+
}
105+
106+
// ------------------------ Prefill model ---------------------------
107+
// 1 infer request for head:
108+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_prefill, MODEL(0), TIMES(1));
109+
// 2 infer requests for function, `create_sync_infer_request()`
110+
// should be called twice here (2 requests to form pipelining):
111+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_prefill, MODEL(1), TIMES(2));
112+
// 1 infer request for tail:
113+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_prefill, MODEL(2), TIMES(1));
114+
115+
// Head's infer request is called once:
116+
EXPECT_INFER(mock_npu_for_prefill, MODEL(0), TIMES(1));
117+
// There are 20 repeated functions, so:
118+
// Repeated block's 1st infer request is called 10 times:
119+
EXPECT_INFER_FOR(mock_npu_for_prefill, MODEL(1), INFER_REQ(0), TIMES(10));
120+
// Repeated block's 2nd infer request (brother of 1st one) is called 10 times:
121+
EXPECT_INFER_FOR(mock_npu_for_prefill, MODEL(1), INFER_REQ(1), TIMES(10));
122+
// Tail's infer request is called once:
123+
EXPECT_INFER(mock_npu_for_prefill, MODEL(2), TIMES(1));
124+
125+
// ------------------------ Generate model ---------------------------
126+
// 1 infer request for head:
127+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_generate, MODEL(0), TIMES(1));
128+
// 2 infer requests for function, `create_sync_infer_request()`
129+
// should be called 20 times here (due to UNFOLD_IREQS):
130+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_generate, MODEL(1), TIMES(20));
131+
// 1 infer request for tail:
132+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_generate, MODEL(2), TIMES(1));
133+
134+
// Head's infer request is called once:
135+
EXPECT_INFER(mock_npu_for_generate, MODEL(0), TIMES(1));
136+
// Different 20 infer requests of 2nd submodel should be called:
137+
for (int i = 0; i < 20; ++i) {
138+
EXPECT_INFER_FOR(mock_npu_for_generate, MODEL(1), INFER_REQ(i), TIMES(1));
139+
}
140+
// Tail's infer request is called once:
141+
EXPECT_INFER(mock_npu_for_generate, MODEL(2), TIMES(1));
142+
143+
// Register mock objects as plugins in OpenVINO:
144+
register_mock_plugins_in_ov();
145+
146+
// Do the actual test:
147+
simple_llm.initialize("C:\\apronina\\models\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\openvino_model.xml",
148+
core, config);
149+
EXPECT_NO_THROW(simple_llm.generate(What_is_OpenVINO));
150+
}
151+
152+
TEST_F(LLMBehaviorTestsNPUW, LLMBehaviorNPUW_BEST_PERF) {
153+
// Set expectations first:
154+
{
155+
InSequence seq;
156+
// Generate model:
157+
EXPECT_COMPILE_MODEL(mock_npu_for_generate, TIMES(1));
158+
// Prefill model:
159+
EXPECT_COMPILE_MODEL(mock_npu_for_prefill, TIMES(3));
160+
}
161+
162+
// ------------------------ Prefill model ---------------------------
163+
// 1 infer request for head:
164+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_prefill, MODEL(0), TIMES(1));
165+
// 2 infer requests for function, `create_sync_infer_request()`
166+
// should be called twice here (2 requests to form pipelining):
167+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_prefill, MODEL(1), TIMES(2));
168+
// 1 infer request for tail:
169+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_prefill, MODEL(2), TIMES(1));
170+
171+
// Head's infer request is called once:
172+
EXPECT_INFER(mock_npu_for_prefill, MODEL(0), TIMES(1));
173+
// There are 20 repeated functions, so:
174+
// Repeated block's 1st infer request is called 10 times:
175+
EXPECT_INFER_FOR(mock_npu_for_prefill, MODEL(1), INFER_REQ(0), TIMES(10));
176+
// Repeated block's 2nd infer request (brother of 1st one) is called 10 times:
177+
EXPECT_INFER_FOR(mock_npu_for_prefill, MODEL(1), INFER_REQ(1), TIMES(10));
178+
// Tail's infer request is called once:
179+
EXPECT_INFER(mock_npu_for_prefill, MODEL(2), TIMES(1));
180+
181+
// ------------------------ Generate model ---------------------------
182+
// 1 infer request for the whole generate model:
183+
EXPECT_CREATE_SYNC_INFER_REQ(mock_npu_for_generate, MODEL(0), TIMES(1));
184+
// Infer request is called only once:
185+
EXPECT_INFER(mock_npu_for_generate, MODEL(0), TIMES(1));
186+
187+
// Register mock objects as plugins in OpenVINO:
188+
register_mock_plugins_in_ov();
189+
190+
// Do the actual test:
191+
192+
config["NPUW_LLM_GENERATE_HINT"] = "BEST_PERF";
193+
simple_llm.initialize("C:\\apronina\\models\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\TinyLlama-1.1B-Chat-v1.0_int4_sym_group128_dyn_stateful\\openvino_model.xml",
194+
core, config);
195+
EXPECT_NO_THROW(simple_llm.generate(What_is_OpenVINO));
196+
}
197+

src/plugins/intel_npu/tests/functional/behavior/npuw/test_engine/mocks/mock_plugins.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ MockPluginBase<DeviceType>::~MockPluginBase() {
303303
}
304304

305305
template class MockPluginBase<ov::npuw::tests::mocks::Npu>;
306+
template class MockPluginBase<ov::npuw::tests::mocks::NpuForPrefill>;
307+
template class MockPluginBase<ov::npuw::tests::mocks::NpuForGenerate>;
306308
template class MockPluginBase<ov::npuw::tests::mocks::Cpu>;
307309

308310
} // namespace tests

src/plugins/intel_npu/tests/functional/behavior/npuw/test_engine/mocks/mock_plugins.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,13 +159,23 @@ struct Npu {
159159
static constexpr const char* name = "MockNPU";
160160
static constexpr int num_device_ids = 3;
161161
};
162+
struct NpuForPrefill {
163+
static constexpr const char* name = "MockNPUForPrefill";
164+
static constexpr int num_device_ids = 3;
165+
};
166+
struct NpuForGenerate {
167+
static constexpr const char* name = "MockNPUForGenerate";
168+
static constexpr int num_device_ids = 3;
169+
};
162170
struct Cpu {
163171
static constexpr const char* name = "MockCPU";
164172
static constexpr int num_device_ids = 2;
165173
};
166174
} //namespace mocks
167175

168176
using MockNpuPlugin = testing::NiceMock<MockPluginBase<mocks::Npu>>;
177+
using MockNpuPluginForPrefill = testing::NiceMock<MockPluginBase<mocks::NpuForPrefill>>;
178+
using MockNpuPluginForGenerate = testing::NiceMock<MockPluginBase<mocks::NpuForGenerate>>;
169179
using MockCpuPlugin = testing::NiceMock<MockPluginBase<mocks::Cpu>>;
170180

171181
} // namespace tests

0 commit comments

Comments
 (0)