ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALBinaryOpCommon.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef ONERT_MICRO_EXECUTE_PAL_BINARYOP_COMMON_H
19#define ONERT_MICRO_EXECUTE_PAL_BINARYOP_COMMON_H
20
21#include "OMStatus.h"
22#include "core/OMRuntimeShape.h"
23#include "PALUtils.h"
25#include <cmath>
26
27namespace onert_micro
28{
29namespace execute
30{
31namespace pal
32{
33
34template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
36{
37 T operator()(T lhs, T rhs)
38 {
39 return std::floor(static_cast<double>(lhs) / static_cast<double>(rhs));
40 }
41};
42template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
44{
45 T operator()(T lhs, T rhs)
46 {
47 T trunc_mod = std::fmod(lhs, rhs);
48 return (trunc_mod != 0) && ((rhs < 0) != (trunc_mod < 0)) ? (trunc_mod + rhs) : trunc_mod;
49 }
50};
51template <typename T> struct MaximumFn
52{
53 T operator()(T lhs, T rhs) { return std::max(lhs, rhs); }
54};
55template <typename T> struct MinimumFn
56{
57 T operator()(T lhs, T rhs) { return std::min(lhs, rhs); }
58};
59
60// TODO: check if there real activation value
61template <typename T, typename Fn>
62inline OMStatus BinaryOp(const int flat_size, const T *input1_data, const T *input2_data,
63 T *output_data)
64{
65 Fn func;
66 for (int i = 0; i < flat_size; ++i)
67 {
68 output_data[i] = func(input1_data[i], input2_data[i]);
69 }
70 return Ok;
71}
72
73template <typename T, typename Fn>
74inline OMStatus
75BroadcastBinaryOp4DSlow(const core::OMRuntimeShape &input1_shape, const float *input1_data,
76 const core::OMRuntimeShape &input2_shape, const float *input2_data,
77 const core::OMRuntimeShape &output_shape, float *output_data)
78{
81 NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);
82
83 const core::OMRuntimeShape extended_output_shape =
85
86 // In Tensorflow, the dimensions are canonically named (batch_number, row,
87 // col, channel), with extents (batches, height, width, depth), with the
88 // trailing dimension changing most rapidly (channels has the smallest stride,
89 // typically 1 element).
90 //
91 // In generated C code, we store arrays with the dimensions reversed. The
92 // first dimension has smallest stride.
93 //
94 // We name our variables by their Tensorflow convention, but generate C code
95 // nesting loops such that the innermost loop has the smallest stride for the
96 // best cache behavior.
97
98 Fn func;
99 for (int b = 0; b < extended_output_shape.dims(0); ++b)
100 {
101 for (int y = 0; y < extended_output_shape.dims(1); ++y)
102 {
103 for (int x = 0; x < extended_output_shape.dims(2); ++x)
104 {
105 for (int c = 0; c < extended_output_shape.dims(3); ++c)
106 {
107 const int output_data_offset =
108 ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *
109 extended_output_shape.dims(3) +
110 c;
111
112 output_data[output_data_offset] = func(input1_data[subscriptToIndex(desc1, b, y, x, c)],
113 input2_data[subscriptToIndex(desc2, b, y, x, c)]);
114 }
115 }
116 }
117 }
118 return Ok;
119}
120
121} // namespace pal
122} // namespace execute
123} // namespace onert_micro
124
125#endif // ONERT_MICRO_EXECUTE_PAL_BINARYOP_COMMON_H
static OMRuntimeShape extendedShape(int new_shape_size, const OMRuntimeShape &shape)
NdArrayDesc< 4 > desc1
const luci_interpreter::RuntimeShape output_shape
NdArrayDesc< 4 > desc2
OMStatus BinaryOp(const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
void NdArrayDescsForElementwiseBroadcast(const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
OMStatus BroadcastBinaryOp4DSlow(const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
int subscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)