ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Conv.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_CONV_H__
19#define __NNFW_CKER_CONV_H__
20
21#include "cker/Types.h"
22#include "cker/Shape.h"
23#include "cker/Utils.h"
26#include <iostream>
27#include <vector>
28
29namespace nnfw
30{
31namespace cker
32{
33
34namespace
35{
36// Naive implementation of transpose for floats. Could be optimized to be more
37// cache friendly, but for now it's a one-time cost on first run, and we would
38// prefer to remove the need to do this at all eventually.
39inline void TransposeFloatTensor(const float *input_data, const nnfw::cker::Shape &output_shape,
40 float *output_data)
41{
42 const int rows = output_shape.Dims(1);
43 const int cols = output_shape.Dims(0);
44 for (int i = 0; i < rows; ++i)
45 {
46 for (int j = 0; j < cols; ++j)
47 {
48 const float in_value = input_data[i * cols + j];
49 output_data[j * rows + i] = in_value;
50 }
51 }
52}
53} // namespace
54
55class Conv
56{
57public:
58 Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
59
60 void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
61 bool &is_replaced_weights, uint32_t dilationWidthFactor,
62 uint32_t dilationHeightFactor)
63 {
64 if (!_prepared)
65 {
66 if (usableMultiThreaded(padding_type, dilationWidthFactor, dilationHeightFactor))
67 {
68 transposeFilter(filter_shape, filter_data, is_replaced_weights);
69 }
70 _prepared = true;
71 }
72 }
73
74 void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape,
75 const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
76 uint32_t dilation_width_factor, uint32_t dilation_height_factor)
77 {
78 if (!_prepared)
79 {
80 IsRequiredIm2col(input_shape, kernel_shape, output_shape, stride_width, stride_height,
81 dilation_width_factor, dilation_height_factor);
82 _prepared = true;
83 }
84 }
85
86 void operator()(const ConvParams &params, const Shape &input_shape, const float *input_data,
87 const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
88 const float *bias_data, const Shape &output_shape, float *output_data)
89 {
90 if (usableMultiThreaded(params.padding_type, params.dilation_width_factor,
92 {
93 bool transposed_in_execution = false;
94 if (!_prepared)
95 {
96 // This means that filter is not constant
97 // TODO Apply optimized kernel if multithreaded kernel is slower than optimized kernel by
98 // transposing filter data
99 transposeFilter(filter_shape, filter_data, transposed_in_execution);
100 }
101 multithreaded::Conv(params, input_shape, input_data, filter_shape, &_modified_filter_data[0],
102 bias_shape, bias_data, output_shape, output_data);
103 }
104 else
105 {
106 // TODO Support optimized kernel
107 reference::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
108 bias_data, output_shape, output_data);
109 }
110 }
111
112 void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
113 const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape,
114 const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
115 {
116 if (!_prepared)
117 {
118 // This means that input or output are dynamic or filter is not constant
119 IsRequiredIm2col(input_shape, filter_shape, output_shape, params.stride_width,
122 }
123
124 int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 1;
125 std::vector<uint8_t> im2col_data(im2col_size);
126 optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
127 bias_data, output_shape, output_data, _im2col_shape, im2col_data.data());
128 }
129
130 void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
131 const Shape &filter_shape, const uint8_t *filter_data,
132 const int32_t *filter_zero_point, const Shape &bias_shape,
133 const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
134 {
135 reference::Conv<uint8_t, true>(params, _per_channel_output_multiplier.data(),
136 _per_channel_output_shift.data(), input_shape, input_data,
137 filter_shape, filter_data, filter_zero_point, bias_shape,
138 bias_data, output_shape, output_data);
139 }
140
141 void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data,
142 const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
143 const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
144 {
145 reference::Conv<int8_t, false>(params, _per_channel_output_multiplier.data(),
146 _per_channel_output_shift.data(), input_shape, input_data,
147 filter_shape, filter_data, nullptr /* filter_zero_point */,
148 bias_shape, bias_data, output_shape, output_data);
149 }
150 std::vector<int32_t> &per_channel_output_multiplier() { return _per_channel_output_multiplier; }
151 std::vector<int> &per_channel_output_shift() { return _per_channel_output_shift; }
152
153private:
154 bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
155 int32_t dilation_height_factor)
156 {
157 return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1 &&
158 dilation_width_factor == 1 && dilation_height_factor == 1;
159 }
160
161 void transposeFilter(const Shape &filter_shape, const float *filter_data,
162 bool &is_replaced_weights)
163 {
164 const auto output_depth = filter_shape.Dims(0);
165 const Shape hwcn_filter_shape{filter_shape.FlatSize() / output_depth, output_depth};
166 _modified_filter_data.resize(hwcn_filter_shape.FlatSize());
167 TransposeFloatTensor(filter_data, hwcn_filter_shape, &_modified_filter_data[0]);
168 is_replaced_weights = true;
169 }
170
171 void IsRequiredIm2col(const Shape &input_shape, const Shape &kernel_shape,
172 const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
173 uint32_t dilation_width_factor, uint32_t dilation_height_factor)
174 {
175 const bool need_dilated_im2col = dilation_width_factor != 1 || dilation_height_factor != 1;
176 const bool need_non_dilated_im2col = stride_width != 1 || stride_height != 1 ||
177 kernel_shape.Dims(1) != 1 || kernel_shape.Dims(2) != 1;
178
179 _need_im2col = need_dilated_im2col || need_non_dilated_im2col;
180
181 if (_need_im2col)
182 {
183 _im2col_shape.SetDim(0, output_shape.Dims(0));
184 _im2col_shape.SetDim(1, output_shape.Dims(1));
185 _im2col_shape.SetDim(2, output_shape.Dims(2));
186 _im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
187 }
188 }
189
190private:
191 std::vector<float> _modified_filter_data;
192 Shape _im2col_shape;
193 bool _need_im2col;
194 bool _prepared;
195 // Per channel output multiplier and shift.
196 std::vector<int32_t> _per_channel_output_multiplier;
197 std::vector<int> _per_channel_output_shift;
198};
199
201{
202 ConvHybridTempArena(int batch_size, int input_size)
203 {
204 input_quantized.resize(input_size);
205 // TODO: Optimize the case of batch_size = 1
206 input_scaling_factors.resize(batch_size);
207 input_offsets.resize(batch_size);
208 }
209 std::vector<int8_t> input_quantized;
210 std::vector<float> input_scaling_factors;
211 std::vector<int32_t> input_offsets;
212};
213
214} // namespace cker
215} // namespace nnfw
216
217#endif // __NNFW_CKER_CONCATENATION_H_
std::vector< int > & per_channel_output_shift()
Definition Conv.h:151
void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const int32_t *filter_zero_point, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
Definition Conv.h:130
void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
Definition Conv.h:112
void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape, uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor, uint32_t dilation_height_factor)
Definition Conv.h:74
void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type, bool &is_replaced_weights, uint32_t dilationWidthFactor, uint32_t dilationHeightFactor)
Definition Conv.h:60
std::vector< int32_t > & per_channel_output_multiplier()
Definition Conv.h:150
void operator()(const ConvParams &params, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data)
Definition Conv.h:86
void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data, const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
Definition Conv.h:141
int FlatSize() const
Definition Shape.h:181
void SetDim(int i, int32_t val)
Definition Shape.h:98
const luci_interpreter::RuntimeShape output_shape
list input_data
Definition infer.py:29
void Conv(const ConvParams &params, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data)
Definition Conv.h:254
void Conv(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data, const Shape &filter_shape, const uint8_t *filter_data, const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data, const Shape &im2col_shape, uint8_t *im2col_data)
Definition Conv.h:83
void Conv(const ConvParams &params, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data)
Definition Conv.h:33
PaddingType
Definition Types.h:41
Definition topk_v2.h:30
Definition Shape.h:28
std::vector< int8_t > input_quantized
Definition Conv.h:209
std::vector< int32_t > input_offsets
Definition Conv.h:211
std::vector< float > input_scaling_factors
Definition Conv.h:210
ConvHybridTempArena(int batch_size, int input_size)
Definition Conv.h:202
int16_t stride_height
Definition Types.h:146
int16_t dilation_width_factor
Definition Types.h:147
PaddingType padding_type
Definition Types.h:142
int16_t dilation_height_factor
Definition Types.h:148