20#include "kernels/Utils.h"
22#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
37 {output, scratch_tensor}, params)
49 assert(
input()->shape().num_dims() == 4);
50 assert(
filter()->shape().num_dims() == 4);
51 assert(
input()->element_type() == DataType::FLOAT32 ||
input()->element_type() == DataType::U8 ||
52 input()->element_type() == DataType::S16);
53 assert(
input()->element_type() ==
output()->element_type());
54 assert(
input()->shape().dim(3) ==
filter()->shape().dim(3));
57 Shape out_shape(num_dims);
58 const auto *shape_data = getTensorData<int32_t>(
output_shape());
59 for (
int i = 0; i < num_dims; i++)
60 out_shape.
dim(i) = shape_data[i];
65 const int32_t output_height = out_shape.
dim(1);
66 const int32_t output_width = out_shape.
dim(2);
68 const int32_t unused_output_height =
70 const int32_t unused_output_width =
78 if (
input()->element_type() == DataType::U8 ||
input()->element_type() == DataType::S16)
81 scratch_tensor->resize(
output()->shape());
82 const std::vector<double> real_multipliers =
90 scratch_tensor->set_allocatable(
false);
96 switch (
input()->element_type())
98 case DataType::FLOAT32:
110 static_cast<size_t>(
filter()->shape().dim(0)));
111 evalQuantizedPerChannel();
118 throw std::runtime_error(
"luci-intp TransposeConv Unsupported type.");
122void TransposeConv::evalFloat()
const
124 float activation_min{};
125 float activation_max{};
130 tflite::ConvParams op_params{};
131 op_params.padding_type = tflite::PaddingType::kSame;
132 op_params.padding_values.height = _padding_height;
133 op_params.padding_values.width = _padding_width;
136 op_params.float_activation_min = activation_min;
137 op_params.float_activation_max = activation_max;
138 tflite::reference_ops::TransposeConv(op_params,
143 tflite::RuntimeShape(),
nullptr);
146void TransposeConv::evalQuantized()
const
148 tflite::ConvParams op_params{};
149 op_params.padding_type = tflite::PaddingType::kSame;
150 op_params.padding_values.height = _padding_height;
151 op_params.padding_values.width = _padding_width;
158 op_params.output_multiplier = _quant_multipliers[0].multiplier;
159 op_params.output_shift = _quant_multipliers[0].shift;
160 op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
161 op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
165 tflite::reference_ops::TransposeConv(op_params,
170 tflite::RuntimeShape(),
nullptr,
171 getTensorData<int32_t>(scratch_tensor));
174void TransposeConv::evalQuantizedPerChannel()
const
177 const auto *filter_data = getTensorData<uint8_t>(
filter());
178 const auto *bias_data = getTensorData<int32_t>(
bias());
182 auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
188 const int32_t batches = input_shape.dim(0);
189 const int32_t input_height = input_shape.dim(1);
190 const int32_t input_width = input_shape.dim(2);
191 const int32_t input_depth = input_shape.dim(3);
192 const int32_t output_depth = filter_shape.dim(0);
193 const int32_t filter_height = filter_shape.dim(1);
194 const int32_t filter_width = filter_shape.dim(2);
201 int32_t activation_min{};
202 int32_t activation_max{};
205 std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() *
sizeof(int32_t));
207 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
208 for (int32_t batch = 0; batch < batches; ++batch)
210 for (int32_t in_y = 0; in_y < input_height; ++in_y)
212 for (int32_t in_x = 0; in_x < input_width; ++in_x)
214 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
216 const int32_t out_y_origin = in_y * stride_height - _padding_height;
217 const int32_t out_x_origin = in_x * stride_width - _padding_width;
218 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
220 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
222 const int32_t out_x = out_x_origin + filter_x;
223 const int32_t out_y = out_y_origin + filter_y;
224 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
226 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
228 const uint8_t input_val =
230 const uint8_t filter_val =
231 filter_data[
calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
242 for (int32_t out_y = 0; out_y < output_height; ++out_y)
244 for (int32_t out_x = 0; out_x < output_width; ++out_x)
246 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
251 acc += bias_data[out_c];
254 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
255 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
258 scaled_acc = std::max(scaled_acc, activation_min);
259 scaled_acc = std::min(scaled_acc, activation_max);
268void TransposeConv::evalQuantizedS16()
const
271 const auto *filter_data = getTensorData<int16_t>(
filter());
272 const auto *bias_data = getTensorData<int64_t>(
bias());
276 auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
282 const int32_t batches = input_shape.dim(0);
283 const int32_t input_height = input_shape.dim(1);
284 const int32_t input_width = input_shape.dim(2);
285 const int32_t input_depth = input_shape.dim(3);
286 const int32_t output_depth = filter_shape.dim(0);
287 const int32_t filter_height = filter_shape.dim(1);
288 const int32_t filter_width = filter_shape.dim(2);
295 int32_t activation_min{};
296 int32_t activation_max{};
299 std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() *
sizeof(int64_t));
301 BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
302 for (int32_t batch = 0; batch < batches; ++batch)
304 for (int32_t in_y = 0; in_y < input_height; ++in_y)
306 for (int32_t in_x = 0; in_x < input_width; ++in_x)
308 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
310 const int32_t out_y_origin = in_y * stride_height - _padding_height;
311 const int32_t out_x_origin = in_x * stride_width - _padding_width;
312 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
314 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
316 const int32_t out_x = out_x_origin + filter_x;
317 const int32_t out_y = out_y_origin + filter_y;
318 if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
320 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
322 const int16_t input_val =
324 const int16_t filter_val =
325 filter_data[
calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
327 static_cast<int64_t
>(input_val) *
static_cast<int64_t
>(filter_val);
335 for (int32_t out_y = 0; out_y < output_height; ++out_y)
337 for (int32_t out_x = 0; out_x < output_width; ++out_x)
339 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
344 acc += bias_data[out_c];
346 int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
347 acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
349 scaled_acc = std::max(scaled_acc, activation_min);
350 scaled_acc = std::min(scaled_acc, activation_max);
const std::vector< Tensor * > & getOutputTensors() const
const TransposeConvParams _params
const TransposeConvParams & params() const
void resize(const Shape &new_shape)
const Shape & shape() const
static int32_t dim(const circle::Tensor *circle_tensor, int i)
const std::vector< int32_t > & zero_points() const
int32_t zero_point() const
void configure() override
const Tensor * filter() const
void execute() const override
const Tensor * output_shape() const
TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input, const Tensor *bias, Tensor *output, Tensor *scratch_tensor, const TransposeConvParams ¶ms)
const Tensor * bias() const
const Tensor * input() const
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size)
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate=1)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)