ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Add.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "Add.h"
19#include "Common.h"
20
21#include "QuantizationHelpers.h"
22#include "mir/Tensor.h"
23#include "mir/ShapeRange.h"
24
25#include <cmath>
26
27namespace mir_interpreter
28{
29
30using namespace mir;
31
32template <typename T> struct AddImpl
33{
34 static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
35};
36
37template <typename T>
38void AddImpl<T>::run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
39{
40 TensorVariant broadcasted_lhs(lhs, res.getShape());
41 TensorVariant broadcasted_rhs(rhs, res.getShape());
42 Tensor<T> lhs_accessor(broadcasted_lhs);
43 Tensor<T> rhs_accessor(broadcasted_rhs);
44 Tensor<T> res_accessor(res);
45
46 for (const auto &index : ShapeRange(res.getShape()))
47 {
48 res_accessor.at(index) = lhs_accessor.at(index) + rhs_accessor.at(index);
49 }
50}
51
52template <> struct AddImpl<uint8_t>
53{
54 static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res);
55};
56
58{
59 const auto &lhs_type = lhs.getType();
60 const auto &rhs_type = rhs.getType();
61 const auto &res_type = res.getType();
62
63 assert(lhs_type.isQuantized());
64 assert(rhs_type.isQuantized());
65 assert(res_type.isQuantized());
66
67 int32_t lhs_offset = -lhs_type.getQuantization().getZeroPoint();
68 int32_t rhs_offset = -rhs_type.getQuantization().getZeroPoint();
69 int32_t output_offset = res_type.getQuantization().getZeroPoint();
70
71 double lhs_scale = lhs_type.getQuantization().getScale();
72 double rhs_scale = rhs_type.getQuantization().getScale();
73 double output_scale = res_type.getQuantization().getScale();
74
75 int left_shift = 20;
76 const double twice_max_input_scale = 2 * std::max(lhs_scale, rhs_scale);
77 const double real_lhs_multiplier = lhs_scale / twice_max_input_scale;
78 const double real_rhs_multiplier = rhs_scale / twice_max_input_scale;
79 const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
80
81 int32_t lhs_multiplier = 0;
82 int32_t rhs_multiplier = 0;
83 int32_t output_multiplier = 0;
84 int lhs_shift = 0;
85 int rhs_shift = 0;
86 int output_shift = 0;
87
88 QuantizeMultiplierSmallerThanOneExp(real_lhs_multiplier, &lhs_multiplier, &lhs_shift);
89 QuantizeMultiplierSmallerThanOneExp(real_rhs_multiplier, &rhs_multiplier, &rhs_shift);
90 QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
91
92 TensorVariant broadcasted_lhs(lhs, res_type.getShape());
93 TensorVariant broadcasted_rhs(rhs, res_type.getShape());
94
95 Tensor<uint8_t> lhs_accessor(broadcasted_lhs);
96 Tensor<uint8_t> rhs_accessor(broadcasted_rhs);
97 Tensor<uint8_t> res_accessor(res);
98
99 int32_t output_min = std::numeric_limits<uint8_t>::min();
100 int32_t output_max = std::numeric_limits<uint8_t>::max();
101
102 for (const auto &index : ShapeRange(res_type.getShape()))
103 {
104 const int32_t lhs_val = lhs_accessor.at(index) + lhs_offset;
105 const int32_t rhs_val = rhs_accessor.at(index) + rhs_offset;
106 const int32_t shifted_lhs_val = lhs_val * (1 << left_shift);
107 const int32_t shifted_rhs_val = rhs_val * (1 << left_shift);
108 const int32_t scaled_lhs_val =
109 MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_lhs_val, lhs_multiplier, lhs_shift);
110 const int32_t scaled_rhs_val =
111 MultiplyByQuantizedMultiplierSmallerThanOneExp(shifted_rhs_val, rhs_multiplier, rhs_shift);
112 const int32_t raw_sum = scaled_lhs_val + scaled_rhs_val;
113 const int32_t raw_output =
114 MultiplyByQuantizedMultiplierSmallerThanOneExp(raw_sum, output_multiplier, output_shift) +
115 output_offset;
116 const int32_t clamped_output = std::min(output_max, std::max(output_min, raw_output));
117 res_accessor.at(index) = static_cast<uint8_t>(clamped_output);
118 }
119}
120
121void Add(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
122{
123 if (lhs.getElementType() != rhs.getElementType())
124 {
125 throw std::runtime_error{"Add with different input types is unsupported"};
126 }
127 dispatch<AddImpl>(res.getElementType(), lhs, rhs, res);
128}
129
130} // namespace mir_interpreter
T at(const Index &id) const
Definition Tensor.h:31
const TensorType & getType() const
const Shape & getShape() const
DataType getElementType() const
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift)
int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(int32_t x, int32_t quantized_multiplier, int left_shift)
void Add(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
Definition Add.cpp:121
static void run(const TensorVariant &lhs, const TensorVariant &rhs, TensorVariant &res)
Definition Add.cpp:38