ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Sub.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "kernels/Sub.h"
19#include "kernels/Utils.h"
20
21#include "PALSub.h"
22
23#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
24
25#include <stdexcept>
26
27namespace luci_interpreter
28{
29namespace kernels
30{
31
32Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
33 : KernelWithParams<SubParams>({input1, input2}, {output}, params)
34{
35}
36
38{
39 LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
40 LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
41 output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
42}
43
44void Sub::execute() const
45{
46 switch (input1()->element_type())
47 {
48 case DataType::FLOAT32:
49 evalFloat();
50 break;
51 case DataType::S64:
52 evalInteger<int64_t>();
53 break;
54 case DataType::S32:
55 evalInteger<int32_t>();
56 break;
57 case DataType::U8:
58 evalQuantized();
59 break;
60 default:
61 throw std::runtime_error("luci-intp Sub Unsupported type.");
62 }
63}
64
65void Sub::evalFloat() const
66{
67 tflite::ArithmeticParams params{};
68 fillArithmeticActivationRange<float>(params, _params.activation);
69
70 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
72
73 if (need_broadcast)
74 {
75 tflite::reference_ops::BroadcastSubSlow(
76 params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
77 getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
78 }
79 else
80 {
81 luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
82 getTensorShape(input2()), getTensorData<float>(input2()),
83 getTensorShape(output()), getTensorData<float>(output()));
84 }
85}
86
87template <typename T> void Sub::evalInteger() const
88{
89 tflite::ArithmeticParams params{};
90 fillArithmeticActivationRange<T>(params, _params.activation);
91
92 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
94
95 if (need_broadcast)
96 {
97 tflite::reference_ops::BroadcastSubSlow(
98 params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
99 getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
100 }
101 else
102 {
103 tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
104 getTensorShape(input2()), getTensorData<T>(input2()),
105 getTensorShape(output()), getTensorData<T>(output()));
106 }
107}
108
109void Sub::evalQuantized() const
110{
111 const auto input1_scale = static_cast<double>(input1()->scale());
112 const auto input2_scale = static_cast<double>(input2()->scale());
113 const auto output_scale = static_cast<double>(output()->scale());
114
115 const int left_shift = 20;
116 const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
117 const double real_input1_multiplier = input1_scale / twice_max_input_scale;
118 const double real_input2_multiplier = input2_scale / twice_max_input_scale;
119 const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
120
121 int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
122 int input1_shift{}, input2_shift{}, output_shift{};
123 quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
124 quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
125 quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
126
127 int32_t activation_min{};
128 int32_t activation_max{};
129 calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
130
131 tflite::ArithmeticParams params{};
132 params.left_shift = left_shift;
133 // The kernel expects inputs' zero points to be negated.
134 params.input1_offset = -input1()->zero_point(); // Note the '-'.
135 params.input1_multiplier = input1_multiplier;
136 params.input1_shift = input1_shift;
137 params.input2_offset = -input2()->zero_point(); // Note the '-'.
138 params.input2_multiplier = input2_multiplier;
139 params.input2_shift = input2_shift;
140 params.output_offset = output()->zero_point();
141 params.output_multiplier = output_multiplier;
142 params.output_shift = output_shift;
143 params.quantized_activation_min = activation_min;
144 params.quantized_activation_max = activation_max;
145
146 const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
148
149 if (need_broadcast)
150 {
151 tflite::reference_ops::BroadcastQuantSubSlow(
152 params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
153 getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
154 }
155 else
156 {
157 tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
158 getTensorShape(input2()), getTensorData<uint8_t>(input2()),
159 getTensorShape(output()), getTensorData<uint8_t>(output()));
160 }
161}
162
163} // namespace kernels
164} // namespace luci_interpreter
void resize(const Shape &new_shape)
Definition Tensor.cpp:56
float scale() const
Definition Tensor.h:109
int32_t zero_point() const
Definition Tensor.h:115
const Tensor * input2() const
Definition Sub.h:34
void configure() override
Definition Sub.cpp:37
Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
Definition Sub.cpp:32
Tensor * output() const
Definition Sub.h:35
void execute() const override
Definition Sub.cpp:44
const Tensor * input1() const
Definition Sub.h:33
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
Definition Utils.cpp:204
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194
void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
Definition Utils.cpp:193
void calculateActivationRangeQuantized(Activation activation, const Tensor *output, int32_t *activation_min, int32_t *activation_max)
Definition Utils.cpp:119