ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
onert::backend::builtin::kernel::PermuteLayer Class Reference

#include <PermuteLayer.h>

Collaboration diagram for onert::backend::builtin::kernel::PermuteLayer:

Public Member Functions

 PermuteLayer (const std::vector< ITensor * > &src_tensors, const std::vector< ITensor * > &dst_tensors, const std::vector< ir::PermuteType > &types, const std::shared_ptr< ExternalContext > &external_context)
 
void optimize () override
 
void run () override
 
- Public Member Functions inherited from onert::exec::IPermuteFunction
virtual void prepare () override
 
- Public Member Functions inherited from onert::exec::IFunction
virtual ~IFunction ()=default
 

Additional Inherited Members

- Protected Member Functions inherited from onert::exec::IPermuteFunction
void permute (backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank, std::vector< size_t > &src_offsets, std::vector< size_t > &dst_offsets, const ir::PermuteType &permute_type)
 
const std::type_info & underlying_type (ir::DataType type) const
 
- Protected Attributes inherited from onert::exec::IPermuteFunction
std::vector< backend::ITensor * > _src_tensors
 
std::vector< backend::ITensor * > _dst_tensors
 
std::vector< std::vector< size_t > > _src_tensors_offsets
 
std::vector< std::vector< size_t > > _dst_tensors_offsets
 
std::vector< ir::PermuteType_permute_types
 
std::unordered_map< const backend::ITensor *, std::vector< uint8_t > > _buffers_map
 

Detailed Description

Definition at line 28 of file PermuteLayer.h.

Constructor & Destructor Documentation

◆ PermuteLayer()

onert::backend::builtin::kernel::PermuteLayer::PermuteLayer ( const std::vector< ITensor * > &  src_tensors,
const std::vector< ITensor * > &  dst_tensors,
const std::vector< ir::PermuteType > &  types,
const std::shared_ptr< ExternalContext > &  external_context 
)

Definition at line 24 of file PermuteLayer.cc.

28 : _external_context{external_context}, _tasks_map{}
29{
30 assert(src_tensors.size() == dst_tensors.size());
31 assert(src_tensors.size() == types.size());
32 _src_tensors = src_tensors;
33 _dst_tensors = dst_tensors;
34 _permute_types = types;
35 _src_tensors_offsets.resize(src_tensors.size());
36 _dst_tensors_offsets.resize(dst_tensors.size());
37 _permute_types.resize(src_tensors.size());
38}
std::vector< std::vector< size_t > > _dst_tensors_offsets
std::vector< ir::PermuteType > _permute_types
std::vector< std::vector< size_t > > _src_tensors_offsets
std::vector< backend::ITensor * > _src_tensors
std::vector< backend::ITensor * > _dst_tensors

References onert::exec::IPermuteFunction::_dst_tensors, onert::exec::IPermuteFunction::_dst_tensors_offsets, onert::exec::IPermuteFunction::_permute_types, onert::exec::IPermuteFunction::_src_tensors, and onert::exec::IPermuteFunction::_src_tensors_offsets.

Member Function Documentation

◆ optimize()

void onert::backend::builtin::kernel::PermuteLayer::optimize ( )
overridevirtual

Implements onert::exec::IPermuteFunction.

Reimplemented in onert::backend::builtin::train::kernel::PermuteLayer.

Definition at line 40 of file PermuteLayer.cc.

41{
42 // Remove copying of tensor as nullptr
43 auto src_it = _src_tensors.begin();
44 auto dst_it = _dst_tensors.begin();
45 auto src_offsets_it = _src_tensors_offsets.begin();
46 auto dst_offsets_it = _dst_tensors_offsets.begin();
47 auto type_it = _permute_types.begin();
48 while (src_it != _src_tensors.end())
49 {
50 if ((*src_it == *dst_it) || (*src_it == nullptr || *dst_it == nullptr))
51 {
52 src_it = _src_tensors.erase(src_it);
53 dst_it = _dst_tensors.erase(dst_it);
54 src_offsets_it = _src_tensors_offsets.erase(src_offsets_it);
55 dst_offsets_it = _dst_tensors_offsets.erase(dst_offsets_it);
56 type_it = _permute_types.erase(type_it);
57 }
58 else
59 {
60 auto src = *src_it;
61 auto dst = *dst_it;
62 src_offsets_it->resize(0);
63 dst_offsets_it->resize(0);
64 if (underlying_type(src->data_type()) != underlying_type(dst->data_type()))
65 continue;
66 const auto permute_type = *type_it;
67
68 // TODO Support different types
69 auto fn = [&](backend::ITensor &src_tensor) {
70 dst->access([&](backend::ITensor &dst_tensor) {
71 // NOTE The buffer of both tensor can be nullptr in this step
72 const auto data_size = ir::sizeOfDataType(src_tensor.data_type());
73
74 if (permute_type == ir::PermuteType::COPY)
75 {
76 if ((!src_tensor.has_padding() && !dst_tensor.has_padding()))
77 {
78 const auto num_elements = src_tensor.getShape().num_elements();
79 const int thread_count =
80 _external_context->ruy_context()->max_num_threads() < static_cast<int>(num_elements)
81 ? _external_context->ruy_context()->max_num_threads()
83
84 std::vector<PermuteWorkerTask> tasks;
85 auto start = 0;
86 for (auto i = 0; i < thread_count; ++i)
87 {
88 int end = start + (num_elements - start) / (thread_count - i);
89 tasks.emplace_back(src_tensor.buffer(), dst_tensor.buffer(), start * data_size,
90 start * data_size, (end - start) * data_size);
91 start = end;
92 }
93 assert(tasks.size() >= 1);
94 _tasks_map[src] = std::move(tasks);
95 }
96 else
97 {
98 auto loop_shape = src_tensor.getShape();
99
100 auto copy_axis = loop_shape.rank() - 1;
101 copy_axis = copy_axis < 0 ? 1 : copy_axis;
102 const auto copy_len = loop_shape.dim(copy_axis) * data_size;
103 loop_shape.dim(copy_axis) = 1;
104
105 appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
106 }
107 }
108 else
109 {
110 assert(src_tensor.getShape().rank() == 4 &&
111 (permute_type == ir::PermuteType::NHWC_TO_NCHW ||
112 permute_type == ir::PermuteType::NCHW_TO_NHWC));
113 const auto loop_shape = src_tensor.getShape();
114 const auto copy_len = data_size;
115
116 appendPermuteTasks(src, dst, loop_shape, copy_len, permute_type);
117 }
118 });
119 };
120 src->access(fn);
121 src_it++;
122 dst_it++;
123 src_offsets_it++;
124 dst_offsets_it++;
125 type_it++;
126 }
127 }
128}
const std::type_info & underlying_type(ir::DataType type) const
uint32_t num_elements(const Shape &shape)
The number of elements of a feature map of a given shape.
Definition Shape.h:59
ShapeIterator end(const Shape &s)
size_t sizeOfDataType(DataType data_type)
Definition DataType.cc:27
CLTensor src_tensor
CLTensor dst_tensor

References onert::exec::IPermuteFunction::_dst_tensors, onert::exec::IPermuteFunction::_dst_tensors_offsets, onert::exec::IPermuteFunction::_permute_types, onert::exec::IPermuteFunction::_src_tensors, onert::exec::IPermuteFunction::_src_tensors_offsets, onert::ir::COPY, dst_tensor, onert::ir::NCHW_TO_NHWC, onert::ir::NHWC_TO_NCHW, onert::ir::sizeOfDataType(), src_tensor, and onert::exec::IPermuteFunction::underlying_type().

Referenced by onert::backend::builtin::train::kernel::PermuteLayer::optimize().

◆ run()

void onert::backend::builtin::kernel::PermuteLayer::run ( )
overridevirtual

Reimplemented from onert::exec::IPermuteFunction.

Definition at line 182 of file PermuteLayer.cc.

183{
184 assert(_src_tensors.size() == _dst_tensors.size());
185 // PermuteLayer infers dynamic shape inside itself whenever run is called for the following
186 // reasons:
187 // 1. PermuteLayer has to access dynamic tensor manager for input/output tensors of other backends
188 // 2. Other controlflow operation(If/While) uses this layout for copying tensors of other
189 // subgraphs(with other backends)
190 // 3. This infering code is placed here to avoid duplicated code that can be caused by above 2
191 // reasons
192
193 // check if output is not dynamic
194 for (size_t i = 0; i < _src_tensors.size(); ++i)
195 {
196 auto dst_tensor = _dst_tensors.at(i);
197 auto src_tensor = _src_tensors.at(i);
198 auto permute_type = _permute_types.at(i);
199 if (src_tensor->is_dynamic() || dst_tensor->is_dynamic())
200 {
201 // getting output shape
202 auto src_shape = src_tensor->getShape();
203
204 // set output shape and output buffer
205 ir::Shape new_shape = ir::convertShape(src_shape, permute_type);
206
207 try
208 {
209 if (!dst_tensor->applyShape(new_shape))
210 throw std::runtime_error{
211 "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
212 assert(dst_tensor->buffer() != nullptr);
213 }
214 catch (const std::out_of_range &e)
215 {
216 std::cerr << "Error: out_of_range in PermuteLayer: output's TensorManager does not support "
217 "dynamic tensor"
218 << '\n';
219 throw;
220 }
221 }
222 assert(ir::convertShape(src_tensor->getShape(), permute_type) == dst_tensor->getShape());
223 }
224 assert(_src_tensors.size() == _dst_tensors.size());
225 assert(_src_tensors.size() == _src_tensors_offsets.size());
226 assert(_dst_tensors.size() == _dst_tensors_offsets.size());
227 auto src_it = _src_tensors.begin();
228 auto dst_it = _dst_tensors.begin();
229 auto src_offsets_it = _src_tensors_offsets.begin();
230 auto dst_offsets_it = _dst_tensors_offsets.begin();
231 auto type_it = _permute_types.begin();
232 while (src_it != _src_tensors.end())
233 {
234 auto src = *src_it;
235 auto dst = *dst_it;
236 auto &src_offsets = *src_offsets_it;
237 auto &dst_offsets = *dst_offsets_it;
238 auto permute_type = *type_it;
239
240 if (src->total_size() == 0)
241 {
242 assert(dst->total_size() == 0);
243 }
244 else
245 {
246 if (src != dst)
247 {
248 // Conditions to run permutation with multithreading
249 // 1. The tasks for multithreathing was created
250 // 2. The tasks's size > 1
251 // 3. Both tensors are not dynamic
252 // 4. Data types of both tensors are different
253 if (_tasks_map.find(src) == _tasks_map.end() || _tasks_map.at(src).size() == 1 ||
254 src->is_dynamic() || dst->is_dynamic() ||
255 underlying_type(src->data_type()) != underlying_type(dst->data_type()))
256 {
257 permute(src, dst, src->getShape().rank(), src_offsets, dst_offsets, permute_type);
258 }
259 // If dst is subtensor, we have to use clEnqueueMapBuffer instead of clEnqueueWirteBuffer
260 else if (dst->needMemoryMap() && !dst->is_subtensor())
261 {
262 if (!src->has_padding() && !dst->has_padding() && permute_type == ir::PermuteType::COPY)
263 {
264 // This is more effective than multi-threading
265 src->access([&](backend::ITensor &) { dst->enqueueWriteBuffer(src->buffer(), false); });
266 }
267 else
268 {
269 // TODO Optimize this block in case of that padding size of dst is big.
270 _buffers_map[dst].reserve(dst->total_size());
271 auto dst_buffer = _buffers_map[dst].data();
272
273 src->access([&](backend::ITensor &) { runPermuteTasks(src, dst_buffer); });
274 dst->enqueueWriteBuffer(dst_buffer, false);
275 }
276 }
277 else if (src->needMemoryMap() && !src->is_subtensor() && !src->has_padding() &&
278 !dst->has_padding() && permute_type == ir::PermuteType::COPY)
279 {
280 // This is more effective than multi-threading
281 assert(!dst->needMemoryMap());
282 dst->access([&](backend::ITensor &) { src->enqueueReadBuffer(dst->buffer(), true); });
283 }
284 else
285 {
286 auto fn = [&](backend::ITensor &) {
287 dst->access([&](backend::ITensor &) { runPermuteTasks(src, dst->buffer()); });
288 };
289 src->access(fn);
290 }
291 }
292 }
293 src_it++;
294 dst_it++;
295 src_offsets_it++;
296 dst_offsets_it++;
297 type_it++;
298 }
299}
std::unordered_map< const backend::ITensor *, std::vector< uint8_t > > _buffers_map
void permute(backend::ITensor *src_tensor, backend::ITensor *dst_tensor, size_t rank, std::vector< size_t > &src_offsets, std::vector< size_t > &dst_offsets, const ir::PermuteType &permute_type)
Shape convertShape(const Shape &shape, const PermuteType &type)
Converts shape when its rank is 4.
Definition Shape.cc:62

References onert::exec::IPermuteFunction::_buffers_map, onert::exec::IPermuteFunction::_dst_tensors, onert::exec::IPermuteFunction::_dst_tensors_offsets, onert::exec::IPermuteFunction::_permute_types, onert::exec::IPermuteFunction::_src_tensors, onert::exec::IPermuteFunction::_src_tensors_offsets, onert::ir::convertShape(), onert::ir::COPY, dst_tensor, onert::exec::IPermuteFunction::permute(), src_tensor, and onert::exec::IPermuteFunction::underlying_type().

Referenced by onert::backend::builtin::train::kernel::PermuteLayer::forward().


The documentation for this class was generated from the following files: