ONE - On-device Neural Engine
Loading...
Searching...
No Matches
TransposeConv.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
19
20#include "kernels/Utils.h"
21
22#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
23
24#include <stdexcept>
25#include <limits> // std::numeric_limits
26
27namespace luci_interpreter
28{
29
30namespace kernels
31{
32
33TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
34 const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
35 const TransposeConvParams &params)
37 {output, scratch_tensor}, params)
38{
39}
40
42{
43 // Define destructor here, to delete vector of qunatized multipliers properly
44}
45
47{
48 assert(output_shape()->shape().num_dims() == 1);
49 assert(input()->shape().num_dims() == 4);
50 assert(filter()->shape().num_dims() == 4);
51 assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
52 input()->element_type() == DataType::S16);
53 assert(input()->element_type() == output()->element_type());
54 assert(input()->shape().dim(3) == filter()->shape().dim(3));
55
56 const int num_dims = output_shape()->shape().dim(0);
57 Shape out_shape(num_dims);
58 const auto *shape_data = getTensorData<int32_t>(output_shape());
59 for (int i = 0; i < num_dims; i++)
60 out_shape.dim(i) = shape_data[i];
61 output()->resize(out_shape);
62
63 const int32_t filter_height = filter()->shape().dim(1);
64 const int32_t filter_width = filter()->shape().dim(2);
65 const int32_t output_height = out_shape.dim(1);
66 const int32_t output_width = out_shape.dim(2);
67
68 const int32_t unused_output_height =
69 computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
70 const int32_t unused_output_width =
71 computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
72
73 _padding_height =
74 computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
75 _padding_width =
76 computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
77
78 if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
79 {
80 auto scratch_tensor = getOutputTensors()[1];
81 scratch_tensor->resize(output()->shape());
82 const std::vector<double> real_multipliers =
83 getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
84
85 _quant_multipliers = quantizeMultipliers(real_multipliers);
86 }
87 else
88 {
89 auto scratch_tensor = getOutputTensors()[1];
90 scratch_tensor->set_allocatable(false);
91 }
92}
93
95{
96 switch (input()->element_type())
97 {
98 case DataType::FLOAT32:
99 evalFloat();
100 break;
101 case DataType::U8:
102 if (filter()->scales().size() == 1)
103 {
104 evalQuantized();
105 }
106 else if (filter()->scales().size() > 1)
107 {
108 LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
109 LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
110 static_cast<size_t>(filter()->shape().dim(0)));
111 evalQuantizedPerChannel();
112 }
113 break;
114 case DataType::S16:
115 evalQuantizedS16();
116 break;
117 default:
118 throw std::runtime_error("luci-intp TransposeConv Unsupported type.");
119 }
120}
121
122void TransposeConv::evalFloat() const
123{
124 float activation_min{};
125 float activation_max{};
126 // TODO support activation
127 assert(_params.activation == Activation::NONE);
128 calculateActivationRange(Activation::NONE, &activation_min, &activation_max);
129
130 tflite::ConvParams op_params{};
131 op_params.padding_type = tflite::PaddingType::kSame;
132 op_params.padding_values.height = _padding_height;
133 op_params.padding_values.width = _padding_width;
134 op_params.stride_height = params().stride_height;
135 op_params.stride_width = params().stride_width;
136 op_params.float_activation_min = activation_min;
137 op_params.float_activation_max = activation_max;
138 tflite::reference_ops::TransposeConv(op_params, //
139 getTensorShape(input()), getTensorData<float>(input()), //
140 getTensorShape(filter()), getTensorData<float>(filter()), //
141 getTensorShape(bias()), getTensorData<float>(bias()), //
142 getTensorShape(output()), getTensorData<float>(output()), //
143 tflite::RuntimeShape(), nullptr);
144}
145
146void TransposeConv::evalQuantized() const
147{
148 tflite::ConvParams op_params{};
149 op_params.padding_type = tflite::PaddingType::kSame;
150 op_params.padding_values.height = _padding_height;
151 op_params.padding_values.width = _padding_width;
152 op_params.stride_height = params().stride_height;
153 op_params.stride_width = params().stride_width;
154 // The kernel expects input and filter zero points to be negated.
155 op_params.input_offset = -input()->zero_point(); // Note the '-'.
156 op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
157 op_params.output_offset = output()->zero_point();
158 op_params.output_multiplier = _quant_multipliers[0].multiplier;
159 op_params.output_shift = _quant_multipliers[0].shift;
160 op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
161 op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
162
163 auto scratch_tensor = getOutputTensors()[1];
164
165 tflite::reference_ops::TransposeConv(op_params, //
166 getTensorShape(input()), getTensorData<uint8>(input()), //
167 getTensorShape(filter()), getTensorData<uint8>(filter()), //
168 getTensorShape(bias()), getTensorData<int32_t>(bias()), //
169 getTensorShape(output()), getTensorData<uint8>(output()), //
170 tflite::RuntimeShape(), nullptr, //
171 getTensorData<int32_t>(scratch_tensor));
172}
173
174void TransposeConv::evalQuantizedPerChannel() const
175{
176 const auto *input_data = getTensorData<uint8_t>(input());
177 const auto *filter_data = getTensorData<uint8_t>(filter());
178 const auto *bias_data = getTensorData<int32_t>(bias());
179 auto *output_data = getTensorData<uint8_t>(output());
180
181 auto scratch_tensor = getOutputTensors()[1];
182 auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
183
184 const Shape &input_shape = input()->shape();
185 const Shape &filter_shape = filter()->shape();
186 const Shape &output_shape = output()->shape();
187
188 const int32_t batches = input_shape.dim(0);
189 const int32_t input_height = input_shape.dim(1);
190 const int32_t input_width = input_shape.dim(2);
191 const int32_t input_depth = input_shape.dim(3);
192 const int32_t output_depth = filter_shape.dim(0);
193 const int32_t filter_height = filter_shape.dim(1);
194 const int32_t filter_width = filter_shape.dim(2);
195 const int32_t output_height = output_shape.dim(1);
196 const int32_t output_width = output_shape.dim(2);
197
198 const int32_t stride_height = _params.stride_height;
199 const int32_t stride_width = _params.stride_width;
200
201 int32_t activation_min{};
202 int32_t activation_max{};
203 calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
204
205 std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
206
207 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
208 for (int32_t batch = 0; batch < batches; ++batch)
209 {
210 for (int32_t in_y = 0; in_y < input_height; ++in_y)
211 {
212 for (int32_t in_x = 0; in_x < input_width; ++in_x)
213 {
214 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
215 {
216 const int32_t out_y_origin = in_y * stride_height - _padding_height;
217 const int32_t out_x_origin = in_x * stride_width - _padding_width;
218 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
219 {
220 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
221 {
222 const int32_t out_x = out_x_origin + filter_x;
223 const int32_t out_y = out_y_origin + filter_y;
224 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
225 {
226 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
227 {
228 const uint8_t input_val =
229 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
230 const uint8_t filter_val =
231 filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
232 scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
233 static_cast<int32_t>(input_val - input()->zero_point()) *
234 static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
235 }
236 }
237 }
238 }
239 }
240 }
241 }
242 for (int32_t out_y = 0; out_y < output_height; ++out_y)
243 {
244 for (int32_t out_x = 0; out_x < output_width; ++out_x)
245 {
246 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
247 {
248 int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
249 if (bias_data)
250 {
251 acc += bias_data[out_c];
252 }
253
254 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
255 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
256
257 scaled_acc += output()->zero_point();
258 scaled_acc = std::max(scaled_acc, activation_min);
259 scaled_acc = std::min(scaled_acc, activation_max);
260
261 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
262 }
263 }
264 }
265 }
266}
267
268void TransposeConv::evalQuantizedS16() const
269{
270 const auto *input_data = getTensorData<int16_t>(input());
271 const auto *filter_data = getTensorData<int16_t>(filter());
272 const auto *bias_data = getTensorData<int64_t>(bias());
273 auto *output_data = getTensorData<int16_t>(output());
274
275 auto scratch_tensor = getOutputTensors()[1];
276 auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
277
278 const Shape &input_shape = input()->shape();
279 const Shape &filter_shape = filter()->shape();
280 const Shape &output_shape = output()->shape();
281
282 const int32_t batches = input_shape.dim(0);
283 const int32_t input_height = input_shape.dim(1);
284 const int32_t input_width = input_shape.dim(2);
285 const int32_t input_depth = input_shape.dim(3);
286 const int32_t output_depth = filter_shape.dim(0);
287 const int32_t filter_height = filter_shape.dim(1);
288 const int32_t filter_width = filter_shape.dim(2);
289 const int32_t output_height = output_shape.dim(1);
290 const int32_t output_width = output_shape.dim(2);
291
292 const int32_t stride_height = _params.stride_height;
293 const int32_t stride_width = _params.stride_width;
294
295 int32_t activation_min{};
296 int32_t activation_max{};
297 calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
298
299 std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
300
301 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
302 for (int32_t batch = 0; batch < batches; ++batch)
303 {
304 for (int32_t in_y = 0; in_y < input_height; ++in_y)
305 {
306 for (int32_t in_x = 0; in_x < input_width; ++in_x)
307 {
308 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
309 {
310 const int32_t out_y_origin = in_y * stride_height - _padding_height;
311 const int32_t out_x_origin = in_x * stride_width - _padding_width;
312 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
313 {
314 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
315 {
316 const int32_t out_x = out_x_origin + filter_x;
317 const int32_t out_y = out_y_origin + filter_y;
318 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
319 {
320 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
321 {
322 const int16_t input_val =
323 input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
324 const int16_t filter_val =
325 filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
326 scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
327 static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
328 }
329 }
330 }
331 }
332 }
333 }
334 }
335 for (int32_t out_y = 0; out_y < output_height; ++out_y)
336 {
337 for (int32_t out_x = 0; out_x < output_width; ++out_x)
338 {
339 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
340 {
341 int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
342 if (bias_data)
343 {
344 acc += bias_data[out_c];
345 }
346 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
347 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
348
349 scaled_acc = std::max(scaled_acc, activation_min);
350 scaled_acc = std::min(scaled_acc, activation_max);
351
352 output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
353 }
354 }
355 }
356 }
357}
358
359} // namespace kernels
360} // namespace luci_interpreter
const std::vector< Tensor * > & getOutputTensors() const
Definition Kernel.h:40
const TransposeConvParams & params() const
Definition Kernel.h:67
int32_t dim(int i) const
Definition Tensor.h:41
void resize(const Shape &new_shape)
Definition Tensor.cpp:56
const Shape & shape() const
Definition Tensor.h:107
static int32_t dim(const circle::Tensor *circle_tensor, int i)
Definition Tensor.h:225
const std::vector< int32_t > & zero_points() const
Definition Tensor.h:123
int32_t zero_point() const
Definition Tensor.h:115
TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, const Tensor *bias, Tensor *output, Tensor *scratch_tensor, const TransposeConvParams &params)
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
list input_data
Definition infer.py:29
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
Definition Utils.h:41
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
Definition Utils.h:75
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
Definition Utils.h:170
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
Definition Utils.cpp:52
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
Definition Utils.h:59
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
Definition Utils.h:147
int32_t size[5]
Definition Slice.cpp:35
Definition Shape.h:28