18#ifndef ONERT_MICRO_EXECUTE_PAL_ARITHMETIC_OP_COMMON_H
19#define ONERT_MICRO_EXECUTE_PAL_ARITHMETIC_OP_COMMON_H
33template <
typename T>
struct AddFn
37template <
typename T>
struct SubFn
41template <
typename T>
struct MulFn
45template <
typename T>
struct DivFn
51 T
operator()(T lhs, T rhs) {
return (lhs - rhs) * (lhs - rhs); }
53template <
typename T,
typename Fn>
55 const T *input1_data,
const T *input2_data, T *output_data)
57 T activation_min, activation_max;
61 for (
int i = 0; i < flat_size; ++i)
63 std::min(std::max(func(input1_data[i], input2_data[i]), activation_min), activation_max);
68template <
typename T,
typename Fn>
75 float activation_min, activation_max;
79 for (
int i = 0; i < flat_size; ++i)
82 float input1 =
static_cast<float>((input1_data[i] -
static_cast<T
>(input1_qparams.
zero_point)) *
83 input1_qparams.
scale);
85 float input2 =
static_cast<float>((input2_data[i] -
static_cast<T
>(input2_qparams.
zero_point)) *
86 input2_qparams.
scale);
87 float result = std::min(std::max(func(input1, input2), activation_min), activation_max);
91 result = std::max<float>(std::numeric_limits<T>::min(), result);
92 result = std::min<float>(std::numeric_limits<T>::max(), result);
93 output_data[i] =
static_cast<T
>(result);
101 const T *input1_data,
const T *input2_data, T *output_data,
104 for (
int i = 0; i <
size; ++i)
106 output_data[i] = binary_func(input1_data[i], input2_data[i], params);
110template <
typename T,
typename Fn>
112 const int flat_size,
const T *input_data,
const T scalar_value,
115 T activation_min, activation_max;
118 for (
int i = 0; i < flat_size; ++i)
120 std::min(std::max(func(input_data[i], scalar_value), activation_min), activation_max);
123template <
typename T,
typename Fn>
135 T activation_min, activation_max;
150 for (
int b = 0; b < extended_output_shape.
dims(0); ++b)
152 for (
int y = 0; y < extended_output_shape.
dims(1); ++y)
154 for (
int x = 0; x < extended_output_shape.
dims(2); ++x)
156 for (
int c = 0; c < extended_output_shape.
dims(3); ++c)
158 const int output_data_offset =
159 ((b * extended_output_shape.
dims(1) + y) * extended_output_shape.
dims(2) + x) *
160 extended_output_shape.
dims(3) +
163 output_data[output_data_offset] =
175template <
typename T,
typename Fn>
190 float activation_min, activation_max;
205 for (
int b = 0; b < extended_output_shape.
dims(0); ++b)
207 for (
int y = 0; y < extended_output_shape.
dims(1); ++y)
209 for (
int x = 0; x < extended_output_shape.
dims(2); ++x)
211 for (
int c = 0; c < extended_output_shape.
dims(3); ++c)
216 input1_qparams.
scale);
220 input2_qparams.
scale);
222 float result = std::min(std::max(func(input1, input2), activation_min), activation_max);
226 result = std::max<float>(std::numeric_limits<T>::min(), result);
227 result = std::min<float>(std::numeric_limits<T>::max(), result);
228 const int output_data_offset =
229 ((b * extended_output_shape.
dims(1) + y) * extended_output_shape.
dims(2) + x) *
230 extended_output_shape.
dims(3) +
232 output_data[output_data_offset] =
static_cast<T
>(result);
242 const T *input2_data, T *output_data,
245 for (
int i = 0; i <
size; ++i)
247 output_data[i] = binary_func(input1_data[0], input2_data[i], params);
253 const T *input2_data, T *output_data,
256 for (
int i = 0; i <
size; ++i)
258 output_data[i] = binary_func(input1_data[i], input2_data[0], params);
264 size_t *input1_offset_p,
size_t *input2_offset_p,
265 size_t *output_offset,
size_t *compressed_input1_stride,
266 size_t *compressed_input2_stride,
size_t *compressed_output_shape,
267 const T *input1_data,
const T *input2_data, T *output_data,
272 for (
size_t c = 0; c < compressed_output_shape[dimension]; ++c)
274 size_t input1_offset_c = *input1_offset_p;
275 size_t input2_offset_c = *input2_offset_p;
277 output_offset, compressed_input1_stride,
278 compressed_input2_stride, compressed_output_shape, input1_data,
279 input2_data, output_data, binary_func);
280 *input1_offset_p += compressed_input1_stride[dimension];
281 *input2_offset_p += compressed_input2_stride[dimension];
286 assert(dimension == 0);
287 bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
288 bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
289 assert(!(input1_is_broadcast && input2_is_broadcast));
290 const T *input1_data_ptr = input1_data + *input1_offset_p;
291 const T *input2_data_ptr = input2_data + *input2_offset_p;
292 T *output_data_ptr = output_data + *output_offset;
293 if (input1_is_broadcast)
296 BroadcastInput1<T>(compressed_output_shape[dimension], params, input1_data_ptr,
297 input2_data_ptr, output_data_ptr, binary_func);
298 *input2_offset_p += compressed_output_shape[dimension];
300 else if (input2_is_broadcast)
303 BroadcastInput2<T>(compressed_output_shape[dimension], params, input1_data_ptr,
304 input2_data_ptr, output_data_ptr, binary_func);
305 *input1_offset_p += compressed_output_shape[dimension];
310 ElementWise<T>(compressed_output_shape[dimension], params, input1_data_ptr, input2_data_ptr,
311 output_data_ptr, binary_func);
312 *input1_offset_p += compressed_output_shape[dimension];
313 *input2_offset_p += compressed_output_shape[dimension];
315 *output_offset += compressed_output_shape[dimension];
326 constexpr int kMaxBroadcastDim = 6;
339 size_t compressed_input1_stride[kMaxBroadcastDim];
340 size_t compressed_input2_stride[kMaxBroadcastDim];
341 size_t compressed_output_shape[kMaxBroadcastDim];
342 bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
343 input1_shape, input2_shape, compressed_input1_stride, compressed_input2_stride,
344 compressed_output_shape);
346 if (!broadcastable_shape)
351 size_t input1_offset = 0;
352 size_t input2_offset = 0;
353 size_t output_offset = 0;
355 &output_offset, compressed_input1_stride, compressed_input2_stride,
356 compressed_output_shape, input1_data, input2_data, output_data,
static OMRuntimeShape extendedShape(size_t new_shape_size, const OMRuntimeShape &shape)
int32_t dims(size_t i) const
const luci_interpreter::RuntimeShape output_shape
void BroadcastInput2(int size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
OMStatus ArithmeticOp(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
void getActivationParams(const P ¶ms, int32_t *min, int32_t *max)
void BroadcastInput1(int size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
void NdArrayDescsForElementwiseBroadcast(const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
OMStatus BroadcastArithmeticOp4DSlow(const core::BinaryArithmeticBroadcastParams ¶ms, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
OMStatus QuantizedBroadcastArithmeticOp4DSlow(const core::BinaryArithmeticBroadcastParams ¶ms, const core::OMRuntimeShape &input1_shape, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const core::OMRuntimeShape &input2_shape, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const core::OMRuntimeShape &output_shape, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
OMStatus QuantizedArithmeticOp(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, const onert_micro::core::QuantizationParams &input1_qparams, const T *input1_data, const onert_micro::core::QuantizationParams &input2_qparams, const T *input2_data, const onert_micro::core::QuantizationParams &output_qparams, T *output_data)
void ArithmeticOpScalar(const core::BinaryArithmeticBroadcastParams ¶ms, const int flat_size, const T *input_data, const T scalar_value, T *output_data)
int subscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
void BroadcastBinaryFunction6DSlow(const core::ArithmeticQuantParams ¶ms, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
void ElementWise(const uint32_t size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
void BroadcastRecursiveDimensions(const core::ArithmeticQuantParams ¶ms, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
T operator()(T lhs, T rhs)
T operator()(T lhs, T rhs)
T operator()(T lhs, T rhs)
T operator()(T lhs, T rhs)
T operator()(T lhs, T rhs)