19#include "kernels/Utils.h"
31void evalFloat(
const circle::Tensor *input,
const circle::Tensor *filter,
32 const circle::Tensor *bias,
const circle::Tensor *output,
37 auto *
input_data = runtime_graph->getDataByTensor(input);
38 auto *
output_data = runtime_graph->getDataByTensor(output);
40 auto *filter_data = runtime_graph->getConstDataByTensor(filter);
41 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
43 int32_t input_shape[kMaxSmallSize];
46 int32_t filter_shape[kMaxSmallSize];
52 luci_interpreter_pal::Conv(params, input_shape, kernels::getTensorData<float>(input_data),
53 filter_shape, kernels::getTensorData<float>(filter_data),
55 kernels::getTensorData<float>(output_data));
62void evalQuantized(
const circle::Tensor *input,
const circle::Tensor *filter,
63 const circle::Tensor *bias,
const circle::Tensor *output,
68 auto *
input_data = runtime_graph->getDataByTensor(input);
69 auto *
output_data = runtime_graph->getDataByTensor(output);
71 auto *filter_data = runtime_graph->getConstDataByTensor(filter);
72 auto *bias_data = runtime_graph->getConstDataByTensor(bias);
74 int32_t input_shape[kMaxSmallSize];
77 int32_t filter_shape[kMaxSmallSize];
83 luci_interpreter_pal::Conv(params, input_shape, kernels::getTensorData<uint8_t>(input_data),
84 filter_shape, kernels::getTensorData<uint8_t>(filter_data),
85 kernels::getTensorData<int32_t>(bias_data),
output_shape,
86 kernels::getTensorData<uint8_t>(output_data));
89void evalQuantizedPerChannel(
const circle::Tensor *input,
const circle::Tensor *filter,
90 const circle::Tensor *bias,
const circle::Tensor *output,
94 auto *raw_input_data = runtime_graph->getDataByTensor(input);
95 auto *raw_output_data = runtime_graph->getDataByTensor(output);
97 auto *raw_filter_data = runtime_graph->getConstDataByTensor(filter);
98 auto *raw_bias_data = runtime_graph->getConstDataByTensor(bias);
102 if (type == DataType::S8)
104 int32_t input_shape[kMaxSmallSize];
107 int32_t filter_shape[kMaxSmallSize];
113 luci_interpreter_pal::QuantizedConvPerChannel(
114 params, input_shape, kernels::getTensorData<int8_t>(raw_input_data), filter_shape,
115 kernels::getTensorData<int8_t>(raw_filter_data),
116 kernels::getTensorData<int32_t>(raw_bias_data),
output_shape,
117 kernels::getTensorData<int8_t>(raw_output_data));
123 const int32_t input_height =
Tensor::dim(input, 1);
126 const int32_t output_depth =
Tensor::dim(filter, 0);
127 const int32_t filter_height =
Tensor::dim(filter, 1);
128 const int32_t filter_width =
Tensor::dim(filter, 2);
129 const int32_t output_height =
Tensor::dim(output, 1);
130 const int32_t output_width =
Tensor::dim(output, 2);
132 const int32_t padding_h = params.padding_values.height;
133 const int32_t padding_w = params.padding_values.width;
134 const int32_t stride_height = params.stride_height;
135 const int32_t stride_width = params.stride_width;
136 const int32_t dilation_height_factor = params.dilation_height_factor;
137 const int32_t dilation_width_factor = params.dilation_width_factor;
139 const int32_t activation_min = params.quantized_activation_min;
140 const int32_t activation_max = params.quantized_activation_max;
143 Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
147 const auto *
input_data = kernels::getTensorData<uint8_t>(raw_input_data);
148 assert(input_data !=
nullptr);
149 const auto *filter_data = kernels::getTensorData<uint8_t>(raw_filter_data);
150 assert(filter_data !=
nullptr);
151 const auto *bias_data = kernels::getTensorData<int32_t>(raw_bias_data);
152 assert(bias_data !=
nullptr);
153 auto *
output_data = kernels::getTensorData<uint8_t>(raw_output_data);
154 assert(output_data !=
nullptr);
156 const std::vector<kernels::ChannelQuantMultipliers> multipliers_raw =
158 kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers> quant_multipliers(
161 for (int32_t batch = 0; batch < batches; ++batch)
163 for (int32_t out_y = 0; out_y < output_height; ++out_y)
165 for (int32_t out_x = 0; out_x < output_width; ++out_x)
167 for (int32_t out_c = 0; out_c < output_depth; ++out_c)
169 const int32_t in_y_origin = out_y * stride_height - padding_h;
170 const int32_t in_x_origin = out_x * stride_width - padding_w;
172 for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
174 for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
176 const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
177 const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
178 if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
180 for (int32_t in_c = 0; in_c < input_depth; ++in_c)
182 const uint8_t input_val =
184 const uint8_t filter_val =
186 acc +=
static_cast<int32_t
>(input_val - Tensor::zero_point(input)) *
187 static_cast<int32_t
>(filter_val - Tensor::zero_points(filter)[out_c]);
194 acc += bias_data[out_c];
198 acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
200 scaled_acc += Tensor::zero_point(output);
201 scaled_acc = std::max(scaled_acc, activation_min);
202 scaled_acc = std::min(scaled_acc, activation_max);
217 const auto input = kernel.
input();
218 const auto filter = kernel.
filter();
219 const auto bias = kernel.
bias();
220 const auto output = kernel.
output();
224 assert(filter_data !=
nullptr);
226 const auto *options = cur_op->builtin_options_as_Conv2DOptions();
228 if (Tensor::element_type(input) == DataType::FLOAT32 &&
229 Tensor::element_type(filter) == DataType::FLOAT32)
234 else if (Tensor::element_type(input) == DataType::U8 &&
235 Tensor::element_type(filter) == DataType::U8)
239 else if (Tensor::element_type(input) == DataType::S8 &&
240 Tensor::element_type(filter) == DataType::S8)
246 for (
auto zerop : Tensor::zero_points(filter))
251 else if (Tensor::element_type(input) == DataType::S16 &&
252 Tensor::element_type(filter) == DataType::S16)
259 assert(
false &&
"Unsupported type.");
264 const int32_t output_depth =
Tensor::dim(filter, 0);
268 (Tensor::num_dims(bias) == 1 &&
Tensor::dim(bias, 0) == output_depth));
270 switch (options->fused_activation_function())
272 case circle::ActivationFunctionType_NONE:
273 case circle::ActivationFunctionType_RELU:
274 case circle::ActivationFunctionType_RELU6:
275 case circle::ActivationFunctionType_RELU_N1_TO_1:
278 assert(
false &&
"Unsupported fused activation");
286 const auto input = kernel.
input();
287 const auto weights = kernel.
filter();
288 const auto bias = kernel.
bias();
289 const auto output = kernel.
output();
291 const auto *options = cur_op->builtin_options_as_Conv2DOptions();
293 const auto type = Tensor::element_type(input);
297 case DataType::FLOAT32:
298 if (Tensor::element_type(weights) == DataType::FLOAT32)
300 evalFloat(input, weights, bias, output, options, runtime_graph);
307 if (Tensor::scales(weights).
size() == 1 and type == DataType::U8)
309 evalQuantized(input, weights, bias, output, options, runtime_graph);
311 else if (Tensor::scales(weights).
size() > 1)
316 evalQuantizedPerChannel(input, weights, bias, output, options, runtime_graph, type);
320 assert(
false &&
"Unsupported yet.");
325 assert(
false &&
"Unsupported type.");
uint8_t * getConstDataByTensor(const circle::Tensor *raw_tensor)
const circle::Tensor * output() const
const circle::Tensor * input() const
const circle::Tensor * filter() const
const circle::Tensor * bias() const
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
DataType
"scalar" value type
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
luci_interpreter_pal::ConvParams createConv2DParams(const circle::Tensor *input, const circle::Tensor *filter, const circle::Tensor *output, const circle::Conv2DOptions *options)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
const loco::Dimension & dim(uint32_t axis) const