ONE - On-device Neural Engine
Loading...
Searching...
No Matches
DepthwiseConvolutionLayer.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
18
19#include "ir/Padding.h"
20
21namespace onert
22{
23namespace backend
24{
25namespace xnnpack
26{
27namespace ops
28{
29
31 const std::shared_ptr<ExternalContext> external_context)
32 : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
33 _padding_type(ir::PaddingType::EXPLICIT), _padding_left(0), _padding_top(0), _padding_right(0),
34 _padding_bottom(0), _stride_width(0), _stride_height(0), _multiplier(1),
35 _dilation_width_factor(1), _dilation_height_factor(1), _activation(ir::Activation::NONE)
36{
37 // DO NOTHING
38}
39
41 const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias,
42 ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right,
43 const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width,
44 const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor,
45 const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
46{
47 _input = input;
48 _kernel = kernel;
49 _bias = bias;
50 _padding_type = padding_type;
51 _padding_left = padding_left;
52 _padding_right = padding_right;
53 _padding_top = padding_top;
54 _padding_bottom = padding_bottom;
55 _stride_width = stride_width;
56 _stride_height = stride_height;
57 _multiplier = multiplier;
58 _dilation_width_factor = dilation_width_factor;
59 _dilation_height_factor = dilation_height_factor;
60 _activation = activation;
61 _output = output;
62
63 assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
64 _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
65}
66
68{
69 assert(_external_context && _external_context->getThreadPool());
70 if (!_setup)
71 {
72 _setup = setup();
73 assert(_setup);
74 }
75
76 if (_input->data_type() == OperandType::FLOAT32)
77 {
78 enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
79 if (status != xnn_status_success)
80 {
81 throw std::runtime_error{"failed to run FP32 DepthwiseConvolution operator"};
82 }
83 }
84 else
85 {
86 throw std::runtime_error{"XNNPACK DepthwiseConv: unsupported data type"};
87 }
88}
89
91{
92 float output_activation_min = 0.f, output_activation_max = 0.f;
93 CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
94
95 // NHWC
96 // Kernel format is [1, kernel_height, kernel_width, depth_out].
97 const auto &kernel_shape = _kernel->getShape();
98 uint32_t kernel_height = kernel_shape.dim(1);
99 uint32_t kernel_width = kernel_shape.dim(2);
100 uint32_t output_channels = kernel_shape.dim(3);
101 uint32_t input_channels = _input->getShape().dim(3);
102 assert(static_cast<uint32_t>(_output->getShape().dim(3)) == output_channels);
103 assert(output_channels == input_channels * _multiplier);
104
105 enum xnn_status status = xnn_create_convolution2d_nhwc_f32(
106 _padding_top, _padding_right, _padding_bottom, _padding_left, kernel_height, kernel_width,
107 _stride_height, _stride_width, _dilation_height_factor, _dilation_width_factor,
108 input_channels /* groups */, 1 /* group_input_channels */,
109 _multiplier /* group_output_channels */, input_channels /* input_channel_stride */,
110 output_channels /* output_channel_stride */, reinterpret_cast<const float *>(_kernel->buffer()),
111 reinterpret_cast<const float *>(_bias->buffer()), output_activation_min, output_activation_max,
112 XNN_FLAG_DEPTHWISE_CONVOLUTION, nullptr, nullptr, &_kernel_op);
113 if (status != xnn_status_success)
114 {
115 throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
116 }
117 assert(_kernel_op != nullptr);
118 return true;
119}
120
122{
123 if (_input->buffer() == nullptr || _output->buffer() == nullptr)
124 {
125 // it could be models's input or output
126 return false;
127 }
128
129 uint32_t input_width = _input->getShape().dim(2);
130 uint32_t input_height = _input->getShape().dim(1);
131 uint32_t batch_size = _input->getShape().dim(0);
132 size_t workspace_size = 0;
133 size_t workspace_alignment = 0;
134 enum xnn_status status = xnn_reshape_convolution2d_nhwc_f32(
135 _kernel_op, batch_size, input_height, input_width, &workspace_size, &workspace_alignment,
136 nullptr, nullptr, _external_context->getThreadPool());
137 if (status != xnn_status_success)
138 {
139 throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
140 }
141
142 std::vector<uint8_t> workspace(workspace_size);
143 status = xnn_setup_convolution2d_nhwc_f32(_kernel_op, workspace.data(),
144 reinterpret_cast<const float *>(_input->buffer()),
145 reinterpret_cast<float *>(_output->buffer()));
146 if (status != xnn_status_success)
147 {
148 throw std::runtime_error{"failed to create FP32 DepthwiseConvolution operator"};
149 }
150 return true;
151}
152
153} // namespace ops
154} // namespace xnnpack
155} // namespace backend
156} // namespace onert
A tensor class that is portable for other backends.
ir::DataType data_type() const override final
ir::Shape getShape() const override final
Get ir::Shape of tensor.
virtual uint8_t * buffer() const =0
void configure(const IPortableTensor *input, const IPortableTensor *kernel, const IPortableTensor *bias, ir::PaddingType padding_type, const uint32_t padding_left, const uint32_t padding_right, const uint32_t padding_top, const uint32_t padding_bottom, const uint32_t stride_width, const uint32_t stride_height, const uint32_t multiplier, const uint32_t dilation_width_factor, const uint32_t dilation_height_factor, const ir::Activation activation, IPortableTensor *output)
DepthwiseConvolutionLayer(const std::shared_ptr< ExternalContext > external_context)
const std::shared_ptr< ExternalContext > _external_context
Definition Layer.h:73