ONE/onert-micro_2include_2pal_2common_2_p_a_l_binary_op_common_8h_source.html

/*

 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved

 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *    http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#ifndef ONERT_MICRO_EXECUTE_PAL_BINARYOP_COMMON_H

#define ONERT_MICRO_EXECUTE_PAL_BINARYOP_COMMON_H


#include "OMStatus.h"

#include "core/OMRuntimeShape.h"

#include "PALUtils.h"

#include "ProcessBroadcastShapes.h"

#include <cmath>


namespace onert_micro

{

namespace execute

{

namespace pal

{


template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>


struct FloorDivFn

{


  T operator()(T lhs, T rhs)

  {

    return std::floor(static_cast<double>(lhs) / static_cast<double>(rhs));

  }


};


template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>


struct FloorModFn

{


  T operator()(T lhs, T rhs)

  {

    T trunc_mod = std::fmod(lhs, rhs);

    return (trunc_mod != 0) && ((rhs < 0) != (trunc_mod < 0)) ? (trunc_mod + rhs) : trunc_mod;

  }


};


template <typename T> struct MaximumFn

{

  T operator()(T lhs, T rhs) { return std::max(lhs, rhs); }

};


template <typename T> struct MinimumFn

{

  T operator()(T lhs, T rhs) { return std::min(lhs, rhs); }

};


// TODO: check if there real activation value

template <typename T, typename Fn>


inline OMStatus BinaryOp(const int flat_size, const T *input1_data, const T *input2_data,

                         T *output_data)

{

  Fn func;

  for (int i = 0; i < flat_size; ++i)

  {

    output_data[i] = func(input1_data[i], input2_data[i]);

  }

  return Ok;

}


template <typename T, typename Fn>

inline OMStatus


BroadcastBinaryOp4DSlow(const core::OMRuntimeShape &input1_shape, const float *input1_data,

                        const core::OMRuntimeShape &input2_shape, const float *input2_data,

                        const core::OMRuntimeShape &output_shape, float *output_data)

{

  NdArrayDesc<4> desc1;

  NdArrayDesc<4> desc2;

  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, &desc2);


  const core::OMRuntimeShape extended_output_shape =

    core::OMRuntimeShape::extendedShape(4, output_shape);


  // In Tensorflow, the dimensions are canonically named (batch_number, row,

  // col, channel), with extents (batches, height, width, depth), with the

  // trailing dimension changing most rapidly (channels has the smallest stride,

  // typically 1 element).

  //

  // In generated C code, we store arrays with the dimensions reversed. The

  // first dimension has smallest stride.

  //

  // We name our variables by their Tensorflow convention, but generate C code

  // nesting loops such that the innermost loop has the smallest stride for the

  // best cache behavior.


  Fn func;

  for (int b = 0; b < extended_output_shape.dims(0); ++b)

  {

    for (int y = 0; y < extended_output_shape.dims(1); ++y)

    {

      for (int x = 0; x < extended_output_shape.dims(2); ++x)

      {

        for (int c = 0; c < extended_output_shape.dims(3); ++c)

        {

          const int output_data_offset =

            ((b * extended_output_shape.dims(1) + y) * extended_output_shape.dims(2) + x) *

              extended_output_shape.dims(3) +

            c;


          output_data[output_data_offset] = func(input1_data[subscriptToIndex(desc1, b, y, x, c)],

                                                 input2_data[subscriptToIndex(desc2, b, y, x, c)]);

        }

      }

    }

  }

  return Ok;

}


} // namespace pal

} // namespace execute

} // namespace onert_micro


#endif // ONERT_MICRO_EXECUTE_PAL_BINARYOP_COMMON_H

OMRuntimeShape.h

OMStatus.h

onert_micro::core::OMRuntimeShape
Definition OMRuntimeShape.h:31

onert_micro::core::OMRuntimeShape::extendedShape
static OMRuntimeShape extendedShape(size_t new_shape_size, const OMRuntimeShape &shape)
Definition OMRuntimeShape.h:111

onert_micro::core::OMRuntimeShape::dims
int32_t dims(size_t i) const
Definition OMRuntimeShape.h:153

desc1
NdArrayDesc< 4 > desc1
Definition PALComparisons.h:33

output_shape
const luci_interpreter::RuntimeShape output_shape
Definition PALComparisons.h:32

desc2
NdArrayDesc< 4 > desc2
Definition PALComparisons.h:34

onert_micro::execute::pal::BinaryOp
OMStatus BinaryOp(const int flat_size, const T *input1_data, const T *input2_data, T *output_data)
Definition PALBinaryOpCommon.h:62

onert_micro::execute::pal::NdArrayDescsForElementwiseBroadcast
void NdArrayDescsForElementwiseBroadcast(const core::OMRuntimeShape &input0_shape, const core::OMRuntimeShape &input1_shape, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition ProcessBroadcastShapes.h:94

onert_micro::execute::pal::BroadcastBinaryOp4DSlow
OMStatus BroadcastBinaryOp4DSlow(const core::OMRuntimeShape &input1_shape, const float *input1_data, const core::OMRuntimeShape &input2_shape, const float *input2_data, const core::OMRuntimeShape &output_shape, float *output_data)
Definition PALBinaryOpCommon.h:75

onert_micro::execute::pal::subscriptToIndex
int subscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
Definition ProcessBroadcastShapes.h:130

onert_micro
Definition OMMemoryManager.h:26

onert_micro::OMStatus
OMStatus
Definition OMStatus.h:24

onert_micro::Ok
@ Ok
Definition OMStatus.h:25

PALUtils.h

ProcessBroadcastShapes.h

onert_micro::execute::pal::FloorDivFn
Definition PALBinaryOpCommon.h:36

onert_micro::execute::pal::FloorDivFn::operator()
T operator()(T lhs, T rhs)
Definition PALBinaryOpCommon.h:37

onert_micro::execute::pal::FloorModFn
Definition PALBinaryOpCommon.h:44

onert_micro::execute::pal::FloorModFn::operator()
T operator()(T lhs, T rhs)
Definition PALBinaryOpCommon.h:45

onert_micro::execute::pal::MaximumFn
Definition PALBinaryOpCommon.h:52

onert_micro::execute::pal::MaximumFn::operator()
T operator()(T lhs, T rhs)
Definition PALBinaryOpCommon.h:53

onert_micro::execute::pal::MinimumFn
Definition PALBinaryOpCommon.h:56

onert_micro::execute::pal::MinimumFn::operator()
T operator()(T lhs, T rhs)
Definition PALBinaryOpCommon.h:57

onert_micro::execute::pal::NdArrayDesc
Definition ProcessBroadcastShapes.h:41