ONE/onert-micro_2luci-interpreter_2src_2kernels_2_add_8cpp_source.html

/*

 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved

 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *    http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#include "Builders.h"

#include "kernels/Utils.h"


#include "kernels/BinaryOpCommon.h"


#include "PALAdd.h"


namespace luci_interpreter

{


namespace

{


#ifndef DIS_QUANT

void evalQuantized(const circle::Tensor *input1, const circle::Tensor *input2,

                   const circle::Tensor *output, const circle::AddOptions *options,

                   BaseRuntimeGraph *runtime_graph, DataType type)

{

  assert(type == DataType::S16 or type == DataType::S8 && "Wrong Type");


  luci_interpreter_pal::ArithmeticParams params{};

  luci_interpreter::RuntimeShape input_shape1 =

    kernels::getTensorRuntimeShape(input1, runtime_graph);

  luci_interpreter::RuntimeShape input_shape2 =

    kernels::getTensorRuntimeShape(input2, runtime_graph);


  const bool need_broadcast =

    luci_interpreter_pal::ProcessBroadcastShapes(input_shape1, input_shape2, &params);


  assert(need_broadcast == false && "Broadcast for INT8 and INT16 not supported now");


  params.input1_offset = -Tensor::zero_point(input1);

  params.input2_offset = -Tensor::zero_point(input2);

  params.output_offset = Tensor::zero_point(output);

  params.left_shift = (type == DataType::S16) ? 15 : 20;


  const auto input1_scale = Tensor::scale(input1);

  const auto input2_scale = Tensor::scale(input2);

  const auto output_scale = Tensor::scale(output);


  const double twice_max_input_scale =

    2 * static_cast<double>(std::max(input1_scale, input2_scale));

  const double real_input1_multiplier = static_cast<double>(input1_scale / twice_max_input_scale);

  const double real_input2_multiplier = static_cast<double>(input2_scale / twice_max_input_scale);

  const double real_output_multiplier =

    twice_max_input_scale / ((1 << params.left_shift) * static_cast<double>(output_scale));


  kernels::quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &params.input1_multiplier,

                                               &params.input1_shift);

  kernels::quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &params.input2_multiplier,

                                               &params.input2_shift);

  kernels::quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &params.output_multiplier,

                                               &params.output_shift);


  kernels::calculateActivationRangeQuantized(luci_actfunc(options->fused_activation_function()),

                                             output, &params.quantized_activation_min,

                                             &params.quantized_activation_max);

  if (type == DataType::S8)

  {

    luci_interpreter_pal::Add(

      params, input_shape1.flatSize(),

      kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input1)),

      kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input2)),

      kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(output)));

  }

  else

  {

    luci_interpreter_pal::Add(

      params, input_shape1.flatSize(),

      kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input1)),

      kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input2)),

      kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));

  }

}

#endif // DIS_QUANT


} // namespace


void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)

{

  kernels::TISOKernel kernel(cur_op, runtime_graph);


  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==

                         Tensor::element_type(kernel.input2()));

  LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==

                         Tensor::element_type(kernel.input2()));


#ifndef DIS_QUANT

  if (Tensor::element_type(kernel.input1()) == DataType::S16)

  {

    LUCI_INTERPRETER_CHECK(Tensor::zero_points(kernel.input1()).size() == 1 &&

                           Tensor::zero_points(kernel.input2()).size() == 1);

    LUCI_INTERPRETER_CHECK(Tensor::zero_point(kernel.input1()) == 0 &&

                           Tensor::zero_point(kernel.input2()) == 0 &&

                           Tensor::zero_point(kernel.output()) == 0);

  }

#endif // DIS_QUANT

}

void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph) {…}


void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)

{

  kernels::TISOKernel kernel(cur_op, runtime_graph);


  const auto *options = cur_op->builtin_options_as_AddOptions();


  luci_interpreter::RuntimeShape input_shape1 =

    kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);

  luci_interpreter::RuntimeShape input_shape2 =

    kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);


  bool is_inplace = runtime_graph->is_inplace_op(cur_op);


  // TODO remove code duplication, introduce func

#ifndef DIS_DYN_SHAPES

  luci_interpreter::RuntimeShape output_shape =

    kernels::getTensorRuntimeShape(kernel.output(), runtime_graph);

  // Dynamic shape case

  if (not is_inplace and not(input_shape1 == output_shape) and not(input_shape2 == output_shape))

  {

    int32_t num_dims;


    if (input_shape1.flatSize() > input_shape2.flatSize())

    {

      output_shape = input_shape1;

      num_dims = input_shape1.dimensionsCount();

    }

    else

    {

      output_shape = input_shape2;

      num_dims = input_shape2.dimensionsCount();

    }


    luci_interpreter::RuntimeShape dynamic_shape(num_dims);

    int32_t data_size = 1;

    for (int i = 0; i < num_dims; ++i)

    {

      dynamic_shape.setDim(i, output_shape.dims(i));

      data_size *= output_shape.dims(i);

    }

    data_size *= size(Tensor::element_type(kernel.output()));


    runtime_graph->addDynamicShapeTensor(kernel.output(), std::move(dynamic_shape));


    if (data_size == 0)

    {

      runtime_graph->resetTensorData(nullptr, kernel.output());

      return;

    }

    auto new_output_data = new uint8_t[data_size];

    runtime_graph->resetTensorData(new_output_data, kernel.output());

  }

#endif // DIS_DYN_SHAPES


  const auto type = Tensor::element_type(kernel.input1());

  switch (type)

  {

#ifndef DIS_FLOAT

    case DataType::FLOAT32:

    {

      auto tiso_func = luci_interpreter_pal::Add<float>;

      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<float>;

      if (is_inplace)

      {

        kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,

                                              std::move(input_shape1), std::move(input_shape2),

                                              std::move(output_shape));

      }

      else

      {

        kernels::TISOData kernel_data = kernel.readData();

        kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,

                                       options, std::move(input_shape1), std::move(input_shape2),

                                       std::move(output_shape));

      }

    }

    break;

#endif // DIS_FLOAT

    case DataType::S64:

    {

      auto tiso_func = luci_interpreter_pal::Add<int64_t>;

      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int64_t>;

      if (is_inplace)

      {

        kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,

                                                std::move(input_shape1), std::move(input_shape2),

                                                std::move(output_shape));

      }

      else

      {

        kernels::TISOData kernel_data = kernel.readData();

        kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,

                                         options, std::move(input_shape1), std::move(input_shape2),

                                         std::move(output_shape));

      }

    }

    break;

    case DataType::S32:

    {

      auto tiso_func = luci_interpreter_pal::Add<int32_t>;

      auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int32_t>;

      if (is_inplace)

      {

        kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,

                                                std::move(input_shape1), std::move(input_shape2),

                                                std::move(output_shape));

      }

      else

      {

        kernels::TISOData kernel_data = kernel.readData();

        kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,

                                         options, std::move(input_shape1), std::move(input_shape2),

                                         std::move(output_shape));

      }

    }

    break;

#ifndef DIS_QUANT

    case DataType::S8:

    case DataType::S16:

    {

      evalQuantized(kernel.input1(), kernel.input2(), kernel.output(), options, runtime_graph,

                    type);

    }

    break;

#endif

    default:

      assert(false && "Unsupported type.");

  }

}

void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph) {…}


} // namespace luci_interpreter

luci_interpreter::RuntimeGraph
Definition RuntimeGraph.h:33

luci_interpreter::RuntimeGraph::addDynamicShapeTensor
void addDynamicShapeTensor(const circle::Tensor *tensor, luci_interpreter::RuntimeShape &&shapes)
Definition RuntimeGraph.cpp:144

luci_interpreter::RuntimeGraph::resetTensorData
void resetTensorData(uint8_t *new_data, const circle::Tensor *tensor)
Definition RuntimeGraph.cpp:207

luci_interpreter::RuntimeGraph::is_inplace_op
bool is_inplace_op(const circle::Operator *op)
Definition RuntimeGraph.h:113

luci_interpreter::RuntimeShape
Definition Tensor.h:36

luci_interpreter::RuntimeShape::dimensionsCount
int32_t dimensionsCount() const
Definition Tensor.h:106

luci_interpreter::RuntimeShape::dims
int32_t dims(int i) const
Definition Tensor.h:108

luci_interpreter::RuntimeShape::setDim
void setDim(int i, int32_t val)
Definition Tensor.h:114

luci_interpreter::RuntimeShape::flatSize
int flatSize() const
Definition Tensor.h:45

luci_interpreter::kernels::TISOKernel
Definition TISOKernel.h:36

luci_interpreter::kernels::TISOKernel::readData
TISOData readData()
Definition TISOKernel.h:66

luci_interpreter::kernels::TISOKernel::output
const circle::Tensor * output() const
Definition TISOKernel.h:62

luci_interpreter::kernels::TISOKernel::input2
const circle::Tensor * input2() const
Definition TISOKernel.h:61

luci_interpreter::kernels::TISOKernel::input1
const circle::Tensor * input1() const
Definition TISOKernel.h:60

LUCI_INTERPRETER_CHECK
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36

output_shape
const luci_interpreter::RuntimeShape output_shape
Definition PALComparisons.h:32

infer.type
type
Definition infer.py:18

loco::DataType
DataType
"scalar" value type
Definition DataType.h:27

luci_interpreter::kernels::quantizeMultiplierSmallerThanOneExp
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition Utils.cpp:193

luci_interpreter::kernels::calculateActivationRangeQuantized
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119

luci_interpreter::kernels::getTensorRuntimeShape
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
Definition Utils.cpp:29

luci_interpreter_pal::Add
void Add(const ArithmeticParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
Definition PALAddCommon.h:28

luci_interpreter_pal::ProcessBroadcastShapes
bool ProcessBroadcastShapes(const luci_interpreter::RuntimeShape &shape0, const luci_interpreter::RuntimeShape &shape1, luci_interpreter_pal::ArithmeticParams *params)
Definition ProcessBroadcastShapes.h:150

luci_interpreter
Definition BuddyMemoryManager.h:22

luci_interpreter::BaseRuntimeGraph
RuntimeGraph BaseRuntimeGraph
Definition RuntimeModule.h:39

luci_interpreter::execute_kernel_CircleAdd
void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Add.cpp:117

luci_interpreter::configure_kernel_CircleAdd
void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Add.cpp:96

luci::luci_actfunc
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
Definition CircleReader.cpp:84

onnx_legalizer.options
options
Definition onnx_legalizer.py:1070

Builders.h

size
int32_t size[5]
Definition Slice.cpp:35

luci_interpreter::kernels::TISOData
Definition TISOKernel.h:28

luci_interpreter_pal::ArithmeticParams
Definition Params.h:178