ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Add.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "Builders.h"
19#include "kernels/Utils.h"
20
21#include "kernels/BinaryOpCommon.h"
22
23#include "PALAdd.h"
24
25namespace luci_interpreter
26{
27
28namespace
29{
30
31#ifndef DIS_QUANT
32void evalQuantized(const circle::Tensor *input1, const circle::Tensor *input2,
33 const circle::Tensor *output, const circle::AddOptions *options,
34 BaseRuntimeGraph *runtime_graph, DataType type)
35{
36 assert(type == DataType::S16 or type == DataType::S8 && "Wrong Type");
37
40 kernels::getTensorRuntimeShape(input1, runtime_graph);
42 kernels::getTensorRuntimeShape(input2, runtime_graph);
43
44 const bool need_broadcast =
45 luci_interpreter_pal::ProcessBroadcastShapes(input_shape1, input_shape2, &params);
46
47 assert(need_broadcast == false && "Broadcast for INT8 and INT16 not supported now");
48
49 params.input1_offset = -Tensor::zero_point(input1);
50 params.input2_offset = -Tensor::zero_point(input2);
51 params.output_offset = Tensor::zero_point(output);
52 params.left_shift = (type == DataType::S16) ? 15 : 20;
53
54 const auto input1_scale = Tensor::scale(input1);
55 const auto input2_scale = Tensor::scale(input2);
56 const auto output_scale = Tensor::scale(output);
57
58 const double twice_max_input_scale =
59 2 * static_cast<double>(std::max(input1_scale, input2_scale));
60 const double real_input1_multiplier = static_cast<double>(input1_scale / twice_max_input_scale);
61 const double real_input2_multiplier = static_cast<double>(input2_scale / twice_max_input_scale);
62 const double real_output_multiplier =
63 twice_max_input_scale / ((1 << params.left_shift) * static_cast<double>(output_scale));
64
65 kernels::quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &params.input1_multiplier,
66 &params.input1_shift);
67 kernels::quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &params.input2_multiplier,
68 &params.input2_shift);
69 kernels::quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &params.output_multiplier,
70 &params.output_shift);
71
73 output, &params.quantized_activation_min,
74 &params.quantized_activation_max);
75 if (type == DataType::S8)
76 {
78 params, input_shape1.flatSize(),
79 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input1)),
80 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input2)),
81 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(output)));
82 }
83 else
84 {
86 params, input_shape1.flatSize(),
87 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input1)),
88 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input2)),
89 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
90 }
91}
92#endif // DIS_QUANT
93
94} // namespace
95
96void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
97{
98 kernels::TISOKernel kernel(cur_op, runtime_graph);
99
100 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
101 Tensor::element_type(kernel.input2()));
102 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
103 Tensor::element_type(kernel.input2()));
104
105#ifndef DIS_QUANT
106 if (Tensor::element_type(kernel.input1()) == DataType::S16)
107 {
108 LUCI_INTERPRETER_CHECK(Tensor::zero_points(kernel.input1()).size() == 1 &&
109 Tensor::zero_points(kernel.input2()).size() == 1);
110 LUCI_INTERPRETER_CHECK(Tensor::zero_point(kernel.input1()) == 0 &&
111 Tensor::zero_point(kernel.input2()) == 0 &&
112 Tensor::zero_point(kernel.output()) == 0);
113 }
114#endif // DIS_QUANT
115}
116
117void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
118{
119 kernels::TISOKernel kernel(cur_op, runtime_graph);
120
121 const auto *options = cur_op->builtin_options_as_AddOptions();
122
123 luci_interpreter::RuntimeShape input_shape1 =
124 kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
125 luci_interpreter::RuntimeShape input_shape2 =
126 kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
127
128 bool is_inplace = runtime_graph->is_inplace_op(cur_op);
129
130 // TODO remove code duplication, introduce func
131#ifndef DIS_DYN_SHAPES
133 kernels::getTensorRuntimeShape(kernel.output(), runtime_graph);
134 // Dynamic shape case
135 if (not is_inplace and not(input_shape1 == output_shape) and not(input_shape2 == output_shape))
136 {
137 int32_t num_dims;
138
139 if (input_shape1.flatSize() > input_shape2.flatSize())
140 {
141 output_shape = input_shape1;
142 num_dims = input_shape1.dimensionsCount();
143 }
144 else
145 {
146 output_shape = input_shape2;
147 num_dims = input_shape2.dimensionsCount();
148 }
149
150 luci_interpreter::RuntimeShape dynamic_shape(num_dims);
151 int32_t data_size = 1;
152 for (int i = 0; i < num_dims; ++i)
153 {
154 dynamic_shape.setDim(i, output_shape.dims(i));
155 data_size *= output_shape.dims(i);
156 }
157 data_size *= size(Tensor::element_type(kernel.output()));
158
159 runtime_graph->addDynamicShapeTensor(kernel.output(), std::move(dynamic_shape));
160
161 if (data_size == 0)
162 {
163 runtime_graph->resetTensorData(nullptr, kernel.output());
164 return;
165 }
166 auto new_output_data = new uint8_t[data_size];
167 runtime_graph->resetTensorData(new_output_data, kernel.output());
168 }
169#endif // DIS_DYN_SHAPES
170
171 const auto type = Tensor::element_type(kernel.input1());
172 switch (type)
173 {
174#ifndef DIS_FLOAT
175 case DataType::FLOAT32:
176 {
177 auto tiso_func = luci_interpreter_pal::Add<float>;
178 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<float>;
179 if (is_inplace)
180 {
181 kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
182 std::move(input_shape1), std::move(input_shape2),
183 std::move(output_shape));
184 }
185 else
186 {
187 kernels::TISOData kernel_data = kernel.readData();
188 kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
189 options, std::move(input_shape1), std::move(input_shape2),
190 std::move(output_shape));
191 }
192 }
193 break;
194#endif // DIS_FLOAT
195 case DataType::S64:
196 {
197 auto tiso_func = luci_interpreter_pal::Add<int64_t>;
198 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int64_t>;
199 if (is_inplace)
200 {
201 kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,
202 std::move(input_shape1), std::move(input_shape2),
203 std::move(output_shape));
204 }
205 else
206 {
207 kernels::TISOData kernel_data = kernel.readData();
208 kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
209 options, std::move(input_shape1), std::move(input_shape2),
210 std::move(output_shape));
211 }
212 }
213 break;
214 case DataType::S32:
215 {
216 auto tiso_func = luci_interpreter_pal::Add<int32_t>;
217 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastAdd4DSlow<int32_t>;
218 if (is_inplace)
219 {
220 kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,
221 std::move(input_shape1), std::move(input_shape2),
222 std::move(output_shape));
223 }
224 else
225 {
226 kernels::TISOData kernel_data = kernel.readData();
227 kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
228 options, std::move(input_shape1), std::move(input_shape2),
229 std::move(output_shape));
230 }
231 }
232 break;
233#ifndef DIS_QUANT
234 case DataType::S8:
235 case DataType::S16:
236 {
237 evalQuantized(kernel.input1(), kernel.input2(), kernel.output(), options, runtime_graph,
238 type);
239 }
240 break;
241#endif
242 default:
243 assert(false && "Unsupported type.");
244 }
245}
246
247} // namespace luci_interpreter
void addDynamicShapeTensor(const circle::Tensor *tensor, luci_interpreter::RuntimeShape &&shapes)
void resetTensorData(uint8_t *new_data, const circle::Tensor *tensor)
bool is_inplace_op(const circle::Operator *op)
int32_t dimensionsCount() const
Definition Tensor.h:106
int32_t dims(int i) const
Definition Tensor.h:108
void setDim(int i, int32_t val)
Definition Tensor.h:114
const circle::Tensor * output() const
Definition TISOKernel.h:62
const circle::Tensor * input2() const
Definition TISOKernel.h:61
const circle::Tensor * input1() const
Definition TISOKernel.h:60
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
type
Definition infer.py:18
DataType
"scalar" value type
Definition DataType.h:27
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition Utils.cpp:193
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
Definition Utils.cpp:29
void Add(const ArithmeticParams &params, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
bool ProcessBroadcastShapes(const luci_interpreter::RuntimeShape &shape0, const luci_interpreter::RuntimeShape &shape1, luci_interpreter_pal::ArithmeticParams *params)
RuntimeGraph BaseRuntimeGraph
void execute_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Add.cpp:117
void configure_kernel_CircleAdd(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Add.cpp:96
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)
int32_t size[5]
Definition Slice.cpp:35