55 const T *input1_data,
const T *input2_data, T *output_data)
57 T activation_min, activation_max;
61 for (
int i = 0; i < flat_size; ++i)
63 std::min(std::max(func(input1_data[i], input2_data[i]), activation_min), activation_max);
104 T activation_min, activation_max;
119 for (
int b = 0; b < extended_output_shape.
dims(0); ++b)
121 for (
int y = 0; y < extended_output_shape.
dims(1); ++y)
123 for (
int x = 0; x < extended_output_shape.
dims(2); ++x)
125 for (
int c = 0; c < extended_output_shape.
dims(3); ++c)
127 const int output_data_offset =
128 ((b * extended_output_shape.
dims(1) + y) * extended_output_shape.
dims(2) + x) *
129 extended_output_shape.
dims(3) +
132 output_data[output_data_offset] =
168 size_t *input1_offset_p,
size_t *input2_offset_p,
169 size_t *output_offset,
size_t *compressed_input1_stride,
170 size_t *compressed_input2_stride,
size_t *compressed_output_shape,
171 const T *input1_data,
const T *input2_data, T *output_data,
176 for (
size_t c = 0; c < compressed_output_shape[dimension]; ++c)
178 size_t input1_offset_c = *input1_offset_p;
179 size_t input2_offset_c = *input2_offset_p;
181 output_offset, compressed_input1_stride,
182 compressed_input2_stride, compressed_output_shape, input1_data,
183 input2_data, output_data, binary_func);
184 *input1_offset_p += compressed_input1_stride[dimension];
185 *input2_offset_p += compressed_input2_stride[dimension];
190 assert(dimension == 0);
191 bool input1_is_broadcast = compressed_input1_stride[dimension] == 0;
192 bool input2_is_broadcast = compressed_input2_stride[dimension] == 0;
193 assert(!(input1_is_broadcast && input2_is_broadcast));
194 const T *input1_data_ptr = input1_data + *input1_offset_p;
195 const T *input2_data_ptr = input2_data + *input2_offset_p;
196 T *output_data_ptr = output_data + *output_offset;
197 if (input1_is_broadcast)
200 BroadcastInput1<T>(compressed_output_shape[dimension], params, input1_data_ptr,
201 input2_data_ptr, output_data_ptr, binary_func);
202 *input2_offset_p += compressed_output_shape[dimension];
204 else if (input2_is_broadcast)
207 BroadcastInput2<T>(compressed_output_shape[dimension], params, input1_data_ptr,
208 input2_data_ptr, output_data_ptr, binary_func);
209 *input1_offset_p += compressed_output_shape[dimension];
214 ElementWise<T>(compressed_output_shape[dimension], params, input1_data_ptr, input2_data_ptr,
215 output_data_ptr, binary_func);
216 *input1_offset_p += compressed_output_shape[dimension];
217 *input2_offset_p += compressed_output_shape[dimension];
219 *output_offset += compressed_output_shape[dimension];
230 constexpr int kMaxBroadcastDim = 6;
243 size_t compressed_input1_stride[kMaxBroadcastDim];
244 size_t compressed_input2_stride[kMaxBroadcastDim];
245 size_t compressed_output_shape[kMaxBroadcastDim];
246 bool broadcastable_shape = ReduceDimensionsForBroadcast<kMaxBroadcastDim>(
247 input1_shape, input2_shape, compressed_input1_stride, compressed_input2_stride,
248 compressed_output_shape);
250 if (!broadcastable_shape)
255 size_t input1_offset = 0;
256 size_t input2_offset = 0;
257 size_t output_offset = 0;
259 &output_offset, compressed_input1_stride, compressed_input2_stride,
260 compressed_output_shape, input1_data, input2_data, output_data,
void BroadcastInput2(int size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
void BroadcastInput1(int size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
OMStatus BroadcastArithmeticOp4DSlow(const core::BinaryArithmeticBroadcastParams ¶ms, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
void BroadcastBinaryFunction6DSlow(const core::ArithmeticQuantParams ¶ms, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
void ElementWise(const uint32_t size, const core::ArithmeticQuantParams ¶ms, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))
void BroadcastRecursiveDimensions(const core::ArithmeticQuantParams ¶ms, int dimension, size_t *input1_offset_p, size_t *input2_offset_p, size_t *output_offset, size_t *compressed_input1_stride, size_t *compressed_input2_stride, size_t *compressed_output_shape, const T *input1_data, const T *input2_data, T *output_data, T(*binary_func)(T, T, const core::ArithmeticQuantParams &))