ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Mul.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "Builders.h"
19#include "kernels/Utils.h"
20
21#include "kernels/BinaryOpCommon.h"
22
23#include "PALMul.h"
24
25namespace luci_interpreter
26{
27
28namespace
29{
30
31#ifndef DIS_QUANT
32void evalQuantized(const circle::Tensor *input1, const circle::Tensor *input2,
33 const circle::Tensor *output, const circle::MulOptions *options,
34 BaseRuntimeGraph *runtime_graph, DataType type)
35{
36 assert(type == DataType::S16 or type == DataType::S8 && "Wrong Type");
37
40 kernels::getTensorRuntimeShape(input1, runtime_graph);
42 kernels::getTensorRuntimeShape(input2, runtime_graph);
43
44 const bool need_broadcast =
45 luci_interpreter_pal::ProcessBroadcastShapes(input_shape1, input_shape2, &params);
46
47 assert(need_broadcast == false && "Broadcast for INT8 and INT16 not supported now");
48
49 params.input1_offset = -Tensor::zero_point(input1);
50 params.input2_offset = -Tensor::zero_point(input2);
51 params.output_offset = Tensor::zero_point(output);
52
53 const auto input1_scale = static_cast<double>(Tensor::scale(input1));
54 const auto input2_scale = static_cast<double>(Tensor::scale(input2));
55 const auto output_scale = static_cast<double>(Tensor::scale(output));
56
57 double real_multiplier = input1_scale * input2_scale / output_scale;
58
59 kernels::quantizeMultiplier(real_multiplier, &params.output_multiplier, &params.output_shift);
60
62 output, &params.quantized_activation_min,
63 &params.quantized_activation_max);
64 if (type == DataType::S8)
65 {
66 luci_interpreter_pal::Mul(
67 params, input_shape1.flatSize(),
68 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input1)),
69 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(input2)),
70 kernels::getTensorData<int8_t>(runtime_graph->getDataByTensor(output)));
71 }
72 else
73 {
74 luci_interpreter_pal::Mul(
75 params, input_shape1.flatSize(),
76 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input1)),
77 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(input2)),
78 kernels::getTensorData<int16_t>(runtime_graph->getDataByTensor(output)));
79 }
80}
81#endif // DIS_QUANT
82
83} // namespace
84
85void configure_kernel_CircleMul(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
86{
87 kernels::TISOKernel kernel(cur_op, runtime_graph);
88
89 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
90 Tensor::element_type(kernel.input2()));
91 LUCI_INTERPRETER_CHECK(Tensor::element_type(kernel.input1()) ==
92 Tensor::element_type(kernel.input2()));
93#ifndef DIS_QUANT
94 if (Tensor::element_type(kernel.input1()) == DataType::S16)
95 {
96 LUCI_INTERPRETER_CHECK(Tensor::zero_points(kernel.input1()).size() == 1 &&
97 Tensor::zero_points(kernel.input2()).size() == 1);
98 LUCI_INTERPRETER_CHECK(Tensor::zero_point(kernel.input1()) == 0 &&
99 Tensor::zero_point(kernel.input2()) == 0 &&
100 Tensor::zero_point(kernel.output()) == 0);
101 }
102#endif // DIS_QUANT
103}
104
105void execute_kernel_CircleMul(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
106{
107 kernels::TISOKernel kernel(cur_op, runtime_graph);
108
109 const auto *options = cur_op->builtin_options_as_MulOptions();
110
111 luci_interpreter::RuntimeShape input_shape1 =
112 kernels::getTensorRuntimeShape(kernel.input1(), runtime_graph);
113 luci_interpreter::RuntimeShape input_shape2 =
114 kernels::getTensorRuntimeShape(kernel.input2(), runtime_graph);
115
117 kernels::getTensorRuntimeShape(kernel.output(), runtime_graph);
118
119 bool is_inplace = runtime_graph->is_inplace_op(cur_op);
120 const auto type = Tensor::element_type(kernel.input1());
121 switch (type)
122 {
123#ifndef DIS_FLOAT
124 case DataType::FLOAT32:
125 {
126 auto tiso_func = luci_interpreter_pal::Mul<float>;
127 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastMul4DSlow<float>;
128 if (is_inplace)
129 {
130 kernels::evalTISOInplaceKernel<float>(tiso_func, broadcast_tiso_func, &kernel, options,
131 std::move(input_shape1), std::move(input_shape2),
132 std::move(output_shape));
133 }
134 else
135 {
136 kernels::TISOData kernel_data = kernel.readData();
137 kernels::evalTISOKernel<float>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
138 options, std::move(input_shape1), std::move(input_shape2),
139 std::move(output_shape));
140 }
141 }
142 break;
143#endif // DIS_FLOAT
144 case DataType::S64:
145 {
146 auto tiso_func = luci_interpreter_pal::Mul<int64_t>;
147 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastMul4DSlow<int64_t>;
148 if (is_inplace)
149 {
150 kernels::evalTISOInplaceKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, options,
151 std::move(input_shape1), std::move(input_shape2),
152 std::move(output_shape));
153 }
154 else
155 {
156 kernels::TISOData kernel_data = kernel.readData();
157 kernels::evalTISOKernel<int64_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
158 options, std::move(input_shape1), std::move(input_shape2),
159 std::move(output_shape));
160 }
161 }
162 break;
163 case DataType::S32:
164 {
165 auto tiso_func = luci_interpreter_pal::Mul<int32_t>;
166 auto broadcast_tiso_func = luci_interpreter_pal::BroadcastMul4DSlow<int32_t>;
167 if (is_inplace)
168 {
169 kernels::evalTISOInplaceKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, options,
170 std::move(input_shape1), std::move(input_shape2),
171 std::move(output_shape));
172 }
173 else
174 {
175 kernels::TISOData kernel_data = kernel.readData();
176 kernels::evalTISOKernel<int32_t>(tiso_func, broadcast_tiso_func, &kernel, &kernel_data,
177 options, std::move(input_shape1), std::move(input_shape2),
178 std::move(output_shape));
179 }
180 }
181 break;
182#ifndef DIS_QUANT
183 case DataType::S8:
184 case DataType::S16:
185 {
186 evalQuantized(kernel.input1(), kernel.input2(), kernel.output(), options, runtime_graph,
187 type);
188 }
189 break;
190#endif // DIS_QUANT
191 default:
192 assert(false && "Unsupported type.");
193 }
194}
195
196} // namespace luci_interpreter
bool is_inplace_op(const circle::Operator *op)
const circle::Tensor * output() const
Definition TISOKernel.h:62
const circle::Tensor * input2() const
Definition TISOKernel.h:61
const circle::Tensor * input1() const
Definition TISOKernel.h:60
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
const luci_interpreter::RuntimeShape output_shape
DataType
"scalar" value type
Definition DataType.h:27
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119
void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
Definition Utils.cpp:157
luci_interpreter::RuntimeShape getTensorRuntimeShape(const circle::Tensor *circle_tensor, BaseRuntimeGraph *runtime_graph)
Definition Utils.cpp:29
bool ProcessBroadcastShapes(const luci_interpreter::RuntimeShape &shape0, const luci_interpreter::RuntimeShape &shape1, luci_interpreter_pal::ArithmeticParams *params)
RuntimeGraph BaseRuntimeGraph
void configure_kernel_CircleMul(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Mul.cpp:85
void execute_kernel_CircleMul(const circle::Operator *cur_op, BaseRuntimeGraph *runtime_graph)
Definition Mul.cpp:105
FusedActFunc luci_actfunc(const circle::ActivationFunctionType type)