67 int32_t extended_output_shape_dims[kMaxMulBroadcastDim];
68 std::memcpy(extended_output_shape_dims, extended_output_shape.
dimsData(),
69 sizeof(extended_output_shape_dims));
71 size_t input1_offset_a = 0;
72 size_t input2_offset_a = 0;
73 size_t output_offset_a = 0;
74 for (
int a = 0; a < extended_output_shape_dims[0]; ++a)
76 size_t input1_offset_d = input1_offset_a;
77 size_t input2_offset_d = input2_offset_a;
78 size_t output_offset_d = output_offset_a;
79 for (
int d = 0; d < extended_output_shape_dims[1]; ++d)
81 size_t input1_offset_b = input1_offset_d;
82 size_t input2_offset_b = input2_offset_d;
83 size_t output_offset_b = output_offset_d;
84 for (
int b = 0; b < extended_output_shape_dims[2]; ++b)
86 size_t input1_offset_y = input1_offset_b;
87 size_t input2_offset_y = input2_offset_b;
88 size_t output_offset_y = output_offset_b;
89 for (
int y = 0; y < extended_output_shape_dims[3]; ++y)
91 size_t input1_offset_x = input1_offset_y;
92 size_t input2_offset_x = input2_offset_y;
93 size_t output_offset_x = output_offset_y;
94 for (
int x = 0; x < extended_output_shape_dims[4]; ++x)
96 size_t input1_offset_c = input1_offset_x;
97 size_t input2_offset_c = input2_offset_x;
98 size_t output_offset_c = output_offset_x;
99 for (
int c = 0; c < extended_output_shape_dims[5]; ++c)
101 const int32_t input1_val = params.
input1_offset + input1_data[input1_offset_c];
102 const int32_t input2_val = params.
input2_offset + input2_data[input2_offset_c];
103 const int32_t unclamped_result =
107 const int32_t clamped_output =
110 output_data[output_offset_c] =
static_cast<T
>(clamped_output);
111 input1_offset_c +=
desc1.strides[5];
112 input2_offset_c +=
desc2.strides[5];
115 input1_offset_x +=
desc1.strides[4];
116 input2_offset_x +=
desc2.strides[4];
117 output_offset_x += extended_output_shape_dims[5];
119 input1_offset_y +=
desc1.strides[3];
120 input2_offset_y +=
desc2.strides[3];
121 output_offset_y += extended_output_shape_dims[4] * extended_output_shape_dims[5];
123 input1_offset_b +=
desc1.strides[2];
124 input2_offset_b +=
desc2.strides[2];
125 output_offset_b += extended_output_shape_dims[3] * extended_output_shape_dims[4] *
126 extended_output_shape_dims[5];
128 input1_offset_d +=
desc1.strides[1];
129 input2_offset_d +=
desc2.strides[1];
130 output_offset_d += extended_output_shape_dims[2] * extended_output_shape_dims[3] *
131 extended_output_shape_dims[4] * extended_output_shape_dims[5];
133 input1_offset_a +=
desc1.strides[0];
134 input2_offset_a +=
desc2.strides[0];
135 output_offset_a += extended_output_shape_dims[1] * extended_output_shape_dims[2] *
136 extended_output_shape_dims[3] * extended_output_shape_dims[4] *
137 extended_output_shape_dims[5];
OMStatus BroadcastMul6DSlow(const core::ArithmeticQuantParams ¶ms, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)
OMStatus Mul(const core::ArithmeticQuantParams ¶ms, const uint32_t flat_size, const int8_t *input1_data, const int8_t *input2_data, int8_t *output_data)
OMStatus BroadcastMul4DSlow(const core::BinaryArithmeticBroadcastParams ¶ms, const core::OMRuntimeShape &input1_shape, const T *input1_data, const core::OMRuntimeShape &input2_shape, const T *input2_data, const core::OMRuntimeShape &output_shape, T *output_data)