ONE - On-device Neural Engine
Loading...
Searching...
No Matches
BroadcastTo.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_BROADCAST_TO_H__
19#define __NNFW_CKER_BROADCAST_TO_H__
20
21#include "cker/Types.h"
22#include "cker/Shape.h"
23#include "cker/Utils.h"
24
26
29
30#include <vector>
31
32#define UNUSED(x) (void)(x)
33
34namespace nnfw
35{
36namespace cker
37{
38namespace functor
39{
40static const int32_t kint32max = ((int32_t)0x7FFFFFFF);
41
42template <typename Device, typename T> struct FillFunctor
43{
44 // Computes on device "d": out = out.constant(in(0)),
45 void operator()(const Device &d, typename TTypes<T>::Flat out,
46 typename TTypes<T>::ConstScalar in);
47};
48
49template <typename T> struct FillFunctor<Eigen::ThreadPoolDevice, T>
50{
51 void operator()(const Eigen::ThreadPoolDevice &d, typename TTypes<T>::Flat out,
52 typename TTypes<T>::ConstScalar in)
53 {
54 out.device(d) = out.constant(in());
55 }
56};
57
58template <typename Device, typename T> struct BroadcastTo
59{
60 template <int NDIMS>
61 void DoBCast32Bit(const Device &device, typename TTypes<T, NDIMS>::Tensor out,
63 const typename Eigen::array<int, NDIMS> &bcast) const
64 {
65 To32Bit(out).device(device) = To32Bit(in).broadcast(bcast);
66 }
67
68 template <int NDIMS>
69 void DoBCast(const Device &device, typename TTypes<T, NDIMS>::Tensor out,
71 const typename Eigen::array<Eigen::DenseIndex, NDIMS> &bcast) const
72 {
73 out.device(device) = in.broadcast(bcast);
74 }
75
76 template <int NDIMS>
77 void ReshapeAndBCast(const Device &device, Tensor &output_tensor, const Tensor &input_tensor,
78 const BCast &bcast) const
79 {
80 const bool can_use_32bit = std::is_same<Eigen::GpuDevice, Device>::value &&
81 output_tensor.shape.FlatSize() < kint32max &&
82 input_tensor.shape.FlatSize() < kint32max;
83 if (can_use_32bit)
84 {
85 DoBCast32Bit<NDIMS>(device, output_tensor.template shaped<T, NDIMS>(bcast.result_shape()),
86 input_tensor.template shaped<T, NDIMS>(bcast.x_reshape()),
87 BCast::ToIndexArrayType<int, NDIMS>(bcast.x_bcast()));
88 }
89 else
90 {
91 DoBCast<NDIMS>(device, output_tensor.template shaped<T, NDIMS>(bcast.result_shape()),
92 input_tensor.template shaped<T, NDIMS>(bcast.x_reshape()),
93 BCast::ToIndexArrayType<Eigen::DenseIndex, NDIMS>(bcast.x_bcast()));
94 }
95 }
96
97 // PRECONDITION: rank(input_shape) > 0 &&
98 // rank(input_shape) <= rank(output_shape) &&
99 // output_shape.num_elements() > 0.
100 void operator()(const Device &device, Tensor &output_tensor, const Shape &output_shape,
101 const Tensor &input_tensor, const Shape &input_shape, const BCast &bcast) const
102 {
103 const int ndims = bcast.y_reshape().size();
104 switch (ndims)
105 {
106 case 1:
107 ReshapeAndBCast<1>(device, output_tensor, input_tensor, bcast);
108 break;
109 case 2:
110 ReshapeAndBCast<2>(device, output_tensor, input_tensor, bcast);
111 break;
112 case 3:
113 ReshapeAndBCast<3>(device, output_tensor, input_tensor, bcast);
114 break;
115 case 4:
116 ReshapeAndBCast<4>(device, output_tensor, input_tensor, bcast);
117 break;
118 case 5:
119 ReshapeAndBCast<5>(device, output_tensor, input_tensor, bcast);
120 break;
121 default:
122 // NOTE : UNUSED leaves for maintenance purposes.
124 UNUSED(input_shape);
125 break;
126 }
127 }
128};
129} // namespace functor
130
131template <typename T>
132inline void BroadcastTo(const Shape &input_shape, T *input_data, const Shape &output_shape,
133 T *output_data)
134{
135 const int input_flatsize = input_shape.FlatSize();
136
137 if (input_shape == output_shape)
138 {
139 memcpy(output_data, input_data, input_flatsize * sizeof(T));
140 return;
141 }
142
143 // Input shape's rank must be no greater than rank of output shape.
144 assert(input_shape.DimensionsCount() <= output_shape.DimensionsCount());
145
146 // It shouldn't be 0.
147 assert(output_shape.DimensionsCount());
148
149 Tensor output_tensor;
150 Tensor input_tensor;
151
152 input_tensor.shape.ReplaceWith(input_shape.DimensionsCount(), input_shape.DimsData());
153 input_tensor.buffer = input_data;
154
155 output_tensor.shape.ReplaceWith(output_shape.DimensionsCount(), output_shape.DimsData());
156 output_tensor.buffer = output_data;
157
158 const Eigen::ThreadPoolDevice &device = *eigen_support::GetThreadPoolDevice();
159
160 // Handle broadcast from Scalar.
161 if (input_flatsize == 0)
162 {
163 functor::FillFunctor<Eigen::ThreadPoolDevice, T>()(device, output_tensor.flat<T>(),
164 input_tensor.scalar<T>());
165 }
166
168 /*fewer_dims_optimization=*/true);
169
170 // Predict TRUE.
171 assert(bcast.IsValid());
172 // should be same.
173 assert(BCast::ToShape(bcast.output_shape()) == output_shape);
174
176 input_tensor, input_shape, bcast);
177}
178
179} // namespace cker
180} // namespace nnfw
181
182#endif // __NNFW_CKER_BROADCAST_TO_H__
static Vec FromShape(const Shape &shape)
Definition BCast.h:444
static Shape ToShape(const BCastList::Vec &vec)
Definition BCast.h:455
const Vec & result_shape() const
Definition BCast.h:409
const Vec & x_reshape() const
Definition BCast.h:405
const Vec & y_reshape() const
Definition BCast.h:407
const Vec & output_shape() const
Definition BCast.h:410
const Vec & x_bcast() const
Definition BCast.h:406
bool IsValid() const
Definition BCast.h:96
int32_t DimensionsCount() const
Definition Shape.h:91
void ReplaceWith(int dimensions_count, const int32_t *dims_data)
Definition Shape.h:130
int FlatSize() const
Definition Shape.h:181
int32_t * DimsData()
Definition Shape.h:112
#define UNUSED(x)
const luci_interpreter::RuntimeShape output_shape
const Eigen::ThreadPoolDevice * GetThreadPoolDevice()
void BroadcastTo(const Shape &input_shape, T *input_data, const Shape &output_shape, T *output_data)
TTypes< typenameTensorType::Scalar, TensorType::NumIndices >::Tensor32Bit To32Bit(TensorType in)
Definition Tensor.h:178
Definition topk_v2.h:30
Eigen::TensorMap< Eigen::TensorFixedSize< const T, Eigen::Sizes<>, Eigen::RowMajor, IndexType >, Eigen::Aligned > ConstScalar
Definition Tensor.h:51
Eigen::TensorMap< Eigen::Tensor< const T, NDIMS, Eigen::RowMajor, IndexType >, Eigen::Aligned > ConstTensor
Definition Tensor.h:35
Eigen::TensorMap< Eigen::Tensor< T, 1, Eigen::RowMajor, IndexType >, Eigen::Aligned > Flat
Definition Tensor.h:61
Eigen::TensorMap< Eigen::Tensor< T, NDIMS, Eigen::RowMajor, IndexType >, Eigen::Aligned > Tensor
Definition Tensor.h:32
TTypes< T >::ConstScalar scalar() const
Definition Tensor.h:156
void DoBCast32Bit(const Device &device, typename TTypes< T, NDIMS >::Tensor out, typename TTypes< T, NDIMS >::ConstTensor in, const typename Eigen::array< int, NDIMS > &bcast) const
Definition BroadcastTo.h:61
void operator()(const Device &device, Tensor &output_tensor, const Shape &output_shape, const Tensor &input_tensor, const Shape &input_shape, const BCast &bcast) const
void ReshapeAndBCast(const Device &device, Tensor &output_tensor, const Tensor &input_tensor, const BCast &bcast) const
Definition BroadcastTo.h:77
void DoBCast(const Device &device, typename TTypes< T, NDIMS >::Tensor out, typename TTypes< T, NDIMS >::ConstTensor in, const typename Eigen::array< Eigen::DenseIndex, NDIMS > &bcast) const
Definition BroadcastTo.h:69
void operator()(const Eigen::ThreadPoolDevice &d, typename TTypes< T >::Flat out, typename TTypes< T >::ConstScalar in)
Definition BroadcastTo.h:51
void operator()(const Device &d, typename TTypes< T >::Flat out, typename TTypes< T >::ConstScalar in)