Skip to content

Commit c9f19d2

Browse files
committed
[onert] Implement BulkPipelineModel for Trix model execution
This implements new BulkPipelineModel class to handle NPU model loading. ONE-DCO-1.0-Signed-off-by: Jonghwa Lee <[email protected]>
1 parent d84669e commit c9f19d2

File tree

3 files changed

+501
-0
lines changed

3 files changed

+501
-0
lines changed
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
/*
2+
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "BulkPipelineModel.h"
18+
19+
#include <iostream>
20+
#include <cstring>
21+
#include <algorithm>
22+
23+
namespace onert
24+
{
25+
namespace backend
26+
{
27+
namespace trix
28+
{
29+
namespace ops
30+
{
31+
32+
BulkPipelineModel::BulkPipelineModel(const std::string &model_path, int device_id)
33+
: _model_path(model_path), _device_id(device_id)
34+
{
35+
// DO NOTHING
36+
}
37+
38+
BulkPipelineModel::~BulkPipelineModel() { release(); }
39+
40+
bool BulkPipelineModel::initialize()
41+
{
42+
if (_initialized.load())
43+
{
44+
return true;
45+
}
46+
47+
if (!loadMetadata())
48+
{
49+
return false;
50+
}
51+
52+
_initialized = true;
53+
return true;
54+
}
55+
56+
bool BulkPipelineModel::prepare()
57+
{
58+
if (_prepared.load())
59+
{
60+
return true;
61+
}
62+
63+
try
64+
{
65+
openDevice();
66+
allocateBuffers();
67+
fillBuffers();
68+
markBufferReady();
69+
registerModel();
70+
71+
_prepared = true;
72+
return true;
73+
}
74+
catch (const std::exception &e)
75+
{
76+
std::cerr << "Failed to prepare model " << _model_path << ": " << e.what() << std::endl;
77+
release();
78+
return false;
79+
}
80+
}
81+
82+
void BulkPipelineModel::release()
83+
{
84+
if (!_prepared.load())
85+
{
86+
return;
87+
}
88+
89+
unregisterModel();
90+
closeDevice();
91+
92+
if (_fp)
93+
{
94+
fclose(_fp);
95+
_fp = nullptr;
96+
}
97+
98+
_program_buffer.reset();
99+
_weight_buffer.reset();
100+
_meta.reset();
101+
_meta_size = 0;
102+
_model_id = 0;
103+
104+
_prepared = false;
105+
}
106+
107+
void BulkPipelineModel::run(const std::vector<const IPortableTensor *> &inputs,
108+
std::vector<IPortableTensor *> &outputs)
109+
{
110+
if (!_prepared.load())
111+
{
112+
throw std::runtime_error("Model is not prepared: " + _model_path);
113+
}
114+
115+
if (!_meta)
116+
{
117+
throw std::runtime_error("Model metadata is not loaded: " + _model_path);
118+
}
119+
120+
// Prepare input buffers
121+
input_buffers input;
122+
input.num_buffers = _meta->input_seg_num;
123+
for (uint32_t i = 0; i < input.num_buffers; i++)
124+
{
125+
uint32_t idx = _meta->input_seg_idx[i];
126+
input.bufs[i].addr = inputs[i]->buffer();
127+
input.bufs[i].type = BUFFER_MAPPED;
128+
input.bufs[i].size = _meta->segment_size[idx];
129+
}
130+
131+
// Prepare output buffers
132+
output_buffers output;
133+
output.num_buffers = _meta->output_seg_num;
134+
for (uint32_t i = 0; i < output.num_buffers; i++)
135+
{
136+
uint32_t idx = _meta->output_seg_idx[i];
137+
output.bufs[i].addr = outputs[i]->buffer();
138+
output.bufs[i].type = BUFFER_MAPPED;
139+
output.bufs[i].size = _meta->segment_size[idx];
140+
}
141+
142+
// Execute the model
143+
int ret = runNPU_model(_dev, _model_id, NPU_INFER_BLOCKING, &input, &output, nullptr, nullptr);
144+
if (ret < 0)
145+
{
146+
throw std::runtime_error("runNPU_model() failed for " + _model_path +
147+
", ret: " + std::to_string(ret));
148+
}
149+
}
150+
151+
void BulkPipelineModel::waitForBufferReady()
152+
{
153+
std::unique_lock<std::mutex> lock(_buffer_mutex);
154+
_buffer_cv.wait(lock, [this] { return _buffer_ready.load(); });
155+
}
156+
157+
void BulkPipelineModel::markBufferReady()
158+
{
159+
{
160+
std::lock_guard<std::mutex> lock(_buffer_mutex);
161+
_buffer_ready = true;
162+
}
163+
_buffer_cv.notify_all();
164+
}
165+
166+
bool BulkPipelineModel::loadMetadata()
167+
{
168+
_fp = fopen(_model_path.c_str(), "rb");
169+
if (!_fp)
170+
{
171+
throw std::runtime_error("Failed to open model file: " + _model_path);
172+
}
173+
174+
_meta = std::make_unique<npubin_meta>();
175+
if (fread(_meta.get(), NPUBIN_META_SIZE, 1, _fp) != 1)
176+
{
177+
throw std::runtime_error("Failed to read metadata from: " + _model_path);
178+
}
179+
180+
_meta_size = _meta->extended_metasize ? sizeof(npubin_meta) + _meta->extended_metasize
181+
: NPUBIN_META_TOTAL_SIZE(_meta->magiccode);
182+
183+
return true;
184+
}
185+
186+
void BulkPipelineModel::allocateBuffers()
187+
{
188+
if (!_meta)
189+
{
190+
throw std::runtime_error("Metadata not loaded for: " + _model_path);
191+
}
192+
193+
_program_buffer =
194+
std::make_shared<BulkPipelineBuffer>(BulkPipelineBuffer::BufferType::DMABUF_CONT,
195+
static_cast<size_t>(_meta->program_size), _device_id);
196+
197+
_weight_buffer =
198+
std::make_shared<BulkPipelineBuffer>(BulkPipelineBuffer::BufferType::DMABUF_IOMMU,
199+
static_cast<size_t>(_meta->weight_size), _device_id);
200+
201+
_program_buffer->allocate();
202+
if (_meta->weight_size > 0)
203+
{
204+
_weight_buffer->allocate();
205+
}
206+
}
207+
208+
void BulkPipelineModel::fillBuffers()
209+
{
210+
if (!_fp || !_program_buffer || !_weight_buffer)
211+
{
212+
throw std::runtime_error("Buffers not properly initialized for: " + _model_path);
213+
}
214+
215+
// Fill program buffer
216+
_program_buffer->fillFromFile(_fp, _meta_size);
217+
218+
// Fill weight buffer
219+
if (_weight_buffer->size() > 0)
220+
{
221+
_weight_buffer->fillFromFile(_fp, _meta_size + _meta->program_size);
222+
}
223+
}
224+
225+
void BulkPipelineModel::registerModel()
226+
{
227+
if (!_dev || !_program_buffer || !_weight_buffer)
228+
{
229+
throw std::runtime_error("Device or buffers not ready for: " + _model_path);
230+
}
231+
232+
generic_buffer modelfile;
233+
modelfile.type = BUFFER_FILE;
234+
modelfile.filepath = _model_path.c_str();
235+
modelfile.size = _meta->size;
236+
237+
int ret = registerNPUmodel_ext(_dev, &modelfile, _program_buffer->getGenericBuffer(),
238+
_weight_buffer->getGenericBuffer(), &_model_id);
239+
if (ret < 0)
240+
{
241+
throw std::runtime_error("Failed to register model: " + _model_path +
242+
", ret: " + std::to_string(ret));
243+
}
244+
}
245+
246+
void BulkPipelineModel::unregisterModel()
247+
{
248+
if (_dev && _model_id > 0)
249+
{
250+
int ret = unregisterNPUmodel(_dev, _model_id);
251+
if (ret < 0)
252+
{
253+
std::cerr << "Failed to unregister model: " << _model_path << ", ret: " << ret << std::endl;
254+
}
255+
_model_id = 0;
256+
}
257+
}
258+
259+
void BulkPipelineModel::openDevice()
260+
{
261+
int ret = getNPUdeviceByType(&_dev, NPUCOND_TRIV24_CONN_SOCIP, _device_id);
262+
if (ret < 0)
263+
{
264+
throw std::runtime_error("Failed to open NPU device for: " + _model_path +
265+
", ret: " + std::to_string(ret));
266+
}
267+
}
268+
269+
void BulkPipelineModel::closeDevice()
270+
{
271+
if (_dev)
272+
{
273+
putNPUdevice(_dev);
274+
_dev = nullptr;
275+
}
276+
}
277+
278+
} // namespace ops
279+
} // namespace trix
280+
} // namespace backend
281+
} // namespace onert
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#ifndef __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__
18+
#define __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__
19+
20+
#include <memory>
21+
#include <string>
22+
#include <vector>
23+
#include <mutex>
24+
#include <condition_variable>
25+
#include <future>
26+
#include <atomic>
27+
#include <backend/IPortableTensor.h>
28+
#include <libnpuhost.h>
29+
30+
#include "BulkPipelineBuffer.h"
31+
32+
namespace onert
33+
{
34+
namespace backend
35+
{
36+
namespace trix
37+
{
38+
namespace ops
39+
{
40+
41+
class BulkPipelineModel
42+
{
43+
public:
44+
BulkPipelineModel(const std::string &model_path, int device_id);
45+
~BulkPipelineModel();
46+
47+
// Disallow copying
48+
BulkPipelineModel(const BulkPipelineModel &) = delete;
49+
BulkPipelineModel &operator=(const BulkPipelineModel &) = delete;
50+
51+
bool initialize();
52+
bool prepare();
53+
void release();
54+
bool isPrepared() const { return _prepared; }
55+
56+
void run(const std::vector<const IPortableTensor *> &inputs,
57+
std::vector<IPortableTensor *> &outputs);
58+
59+
void waitForBufferReady();
60+
void markBufferReady();
61+
62+
const npubin_meta *metadata() const { return _meta.get(); }
63+
uint64_t programSize() const { return _meta->program_size; }
64+
uint64_t weightSize() const { return _meta->weight_size; }
65+
uint32_t modelId() const { return _model_id; }
66+
npudev_h device() const { return _dev; }
67+
const std::string &modelPath() const { return _model_path; }
68+
69+
private:
70+
bool loadMetadata();
71+
void allocateBuffers();
72+
void fillBuffers();
73+
void registerModel();
74+
void unregisterModel();
75+
void openDevice();
76+
void closeDevice();
77+
78+
private:
79+
std::string _model_path;
80+
int _device_id;
81+
std::atomic<bool> _initialized{false};
82+
std::atomic<bool> _prepared{false};
83+
84+
npudev_h _dev;
85+
uint32_t _model_id{0};
86+
87+
std::unique_ptr<npubin_meta> _meta;
88+
size_t _meta_size{0};
89+
FILE *_fp{nullptr};
90+
91+
std::shared_ptr<BulkPipelineBuffer> _program_buffer;
92+
std::shared_ptr<BulkPipelineBuffer> _weight_buffer;
93+
94+
std::mutex _buffer_mutex;
95+
std::condition_variable _buffer_cv;
96+
std::atomic<bool> _buffer_ready{false};
97+
};
98+
99+
} // namespace ops
100+
} // namespace trix
101+
} // namespace backend
102+
} // namespace onert
103+
104+
#endif // __ONERT_BACKEND_TRIX_OPS_BULKPIPELINEMODEL_H__

0 commit comments

Comments
 (0)