19#include "kernels/Utils.h"
31void evalFloat(
const circle::Tensor *input,
const circle::Tensor *filter,
32 const circle::Tensor *bias,
const circle::Tensor *output,
37 auto *
input_data = runtime_graph->getDataByTensor(input);
38 auto *
output_data = runtime_graph->getDataByTensor(output);
40 auto *
filter_data = runtime_graph->getConstDataByTensor(filter);
41 auto *
bias_data = runtime_graph->getConstDataByTensor(bias);
52 luci_interpreter_pal::Conv(params,
input_shape, kernels::getTensorData<float>(input_data),
55 kernels::getTensorData<float>(output_data));
62void evalQuantized(
const circle::Tensor *input,
const circle::Tensor *filter,
63 const circle::Tensor *bias,
const circle::Tensor *output,
68 auto *
input_data = runtime_graph->getDataByTensor(input);
69 auto *
output_data = runtime_graph->getDataByTensor(output);
71 auto *
filter_data = runtime_graph->getConstDataByTensor(filter);
72 auto *
bias_data = runtime_graph->getConstDataByTensor(bias);
83 luci_interpreter_pal::Conv(params,
input_shape, kernels::getTensorData<uint8_t>(input_data),
86 kernels::getTensorData<uint8_t>(output_data));
89void evalQuantizedPerChannel(
const circle::Tensor *input,
const circle::Tensor *filter,
90 const circle::Tensor *bias,
const circle::Tensor *output,
98 auto *
raw_bias_data = runtime_graph->getConstDataByTensor(bias);
102 if (
type == DataType::S8)
113 luci_interpreter_pal::QuantizedConvPerChannel(
127 const int32_t filter_height =
Tensor::dim(filter, 1);
128 const int32_t filter_width =
Tensor::dim(filter, 2);
129 const int32_t output_height =
Tensor::dim(output, 1);
130 const int32_t output_width =
Tensor::dim(output, 2);
132 const int32_t
padding_h = params.padding_values.height;
133 const int32_t
padding_w = params.padding_values.width;
134 const int32_t stride_height = params.stride_height;
135 const int32_t stride_width = params.stride_width;
136 const int32_t dilation_height_factor = params.dilation_height_factor;
137 const int32_t dilation_width_factor = params.dilation_width_factor;
139 const int32_t activation_min = params.quantized_activation_min;
140 const int32_t activation_max = params.quantized_activation_max;
143 Tensor::scale(input), Tensor::scales(filter), Tensor::scale(output));
148 assert(input_data !=
nullptr);
154 assert(output_data !=
nullptr);
158 kernels::BroadcastableWrapper<kernels::ChannelQuantMultipliers>
quant_multipliers(
161 for (int32_t batch = 0; batch <
batches; ++batch)
186 acc +=
static_cast<int32_t
>(
input_val - Tensor::zero_point(input)) *
217 const auto input = kernel.
input();
218 const auto filter = kernel.
filter();
219 const auto bias = kernel.
bias();
220 const auto output = kernel.
output();
226 const auto *options =
cur_op->builtin_options_as_Conv2DOptions();
228 if (Tensor::element_type(input) == DataType::FLOAT32 &&
229 Tensor::element_type(filter) == DataType::FLOAT32)
234 else if (Tensor::element_type(input) == DataType::U8 &&
235 Tensor::element_type(filter) == DataType::U8)
239 else if (Tensor::element_type(input) == DataType::S8 &&
240 Tensor::element_type(filter) == DataType::S8)
246 for (
auto zerop : Tensor::zero_points(filter))
251 else if (Tensor::element_type(input) == DataType::S16 &&
252 Tensor::element_type(filter) == DataType::S16)
259 assert(
false &&
"Unsupported type.");
270 switch (options->fused_activation_function())
272 case circle::ActivationFunctionType_NONE:
273 case circle::ActivationFunctionType_RELU:
274 case circle::ActivationFunctionType_RELU6:
275 case circle::ActivationFunctionType_RELU_N1_TO_1:
278 assert(
false &&
"Unsupported fused activation");
286 const auto input = kernel.
input();
287 const auto weights = kernel.
filter();
288 const auto bias = kernel.
bias();
289 const auto output = kernel.
output();
291 const auto *options =
cur_op->builtin_options_as_Conv2DOptions();
293 const auto type = Tensor::element_type(input);
297 case DataType::FLOAT32:
298 if (Tensor::element_type(weights) == DataType::FLOAT32)
300 evalFloat(input, weights, bias, output, options, runtime_graph);
307 if (Tensor::scales(weights).
size() == 1
and type == DataType::U8)
309 evalQuantized(input, weights, bias, output, options, runtime_graph);
311 else if (Tensor::scales(weights).
size() > 1)
316 evalQuantizedPerChannel(input, weights, bias, output, options, runtime_graph,
type);
320 assert(
false &&
"Unsupported yet.");
325 assert(
false &&
"Unsupported type.");
uint8_t * getConstDataByTensor(const circle::Tensor *raw_tensor)
const circle::Tensor * output() const
const circle::Tensor * input() const
const circle::Tensor * filter() const
const circle::Tensor * bias() const
#define LUCI_INTERPRETER_CHECK(cond)
const luci_interpreter::RuntimeShape output_shape
DataType
"scalar" value type
int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
std::vector< ChannelQuantMultipliers > quantizeMultipliers(const std::vector< double > &effective_scale)
std::vector< double > getQuantizedConvolutionMultiplers(float input_scale, const std::vector< float > &filter_scale, float output_scale)
void getTensorDims(const circle::Tensor *tensor, BaseRuntimeGraph *runtime_graph, int32_t *dims)
int32_t multiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
luci_interpreter_pal::ConvParams createConv2DParams(const circle::Tensor *input, const circle::Tensor *filter, const circle::Tensor *output, const circle::Conv2DOptions *options)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
void configure_kernel_CircleConv2D(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
T must_cast(loco::Node *node)
const loco::Dimension & dim(uint32_t axis) const