ONE - On-device Neural Engine
Loading...
Searching...
No Matches
tflite::reference_ops Namespace Reference

Data Structures

struct  GatherNdHelperResult
 

Functions

template<typename T >
void Relu (const RuntimeShape &input_shape, const T *input_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void Relu1 (const RuntimeShape &input_shape, const T *input_data, const RuntimeShape &output_shape, T *output_data)
 
void Relu6 (const RuntimeShape &input_shape, const float *input_data, const RuntimeShape &output_shape, float *output_data)
 
template<typename T >
void ReluX (const tflite::ReluParams &params, const RuntimeShape &input_shape, const T *input_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void ReluX (const tflite::ActivationParams &params, const RuntimeShape &input_shape, const T *input_data, const RuntimeShape &output_shape, T *output_data)
 
void BroadcastMulFivefold (const ArithmeticParams &unswitched_params, const RuntimeShape &unswitched_input1_shape, const uint8 *unswitched_input1_data, const RuntimeShape &unswitched_input2_shape, const uint8 *unswitched_input2_data, const RuntimeShape &output_shape, uint8 *output_data)
 
void Mul (const ArithmeticParams &params, const RuntimeShape &input1_shape, const int16 *input1_data, const RuntimeShape &input2_shape, const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
 
void Mul (const ArithmeticParams &params, const RuntimeShape &input1_shape, const int16 *input1_data, const RuntimeShape &input2_shape, const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
 
void Sub16 (const ArithmeticParams &params, const RuntimeShape &input1_shape, const int16_t *input1_data, const RuntimeShape &input2_shape, const int16_t *input2_data, const RuntimeShape &output_shape, int16_t *output_data)
 
template<typename Scalar >
void Pack (const PackParams &params, const RuntimeShape *const *input_shapes, const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
 
template<typename Scalar >
void Unpack (const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data, const RuntimeShape &output_shape, Scalar *const *output_datas)
 
template<typename Scalar >
void PackWithScaling (const PackParams &params, const RuntimeShape *const *input_shapes, const uint8 *const *input_data, const RuntimeShape &output_shape, uint8 *output_data)
 
template<typename Scalar >
void DepthConcatenation (const ConcatenationParams &params, const RuntimeShape *const *input_shapes, const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
 
void LstmCell (const LstmCellParams &params, const RuntimeShape &unextended_input_shape, const float *input_data, const RuntimeShape &unextended_prev_activ_shape, const float *prev_activ_data, const RuntimeShape &weights_shape, const float *weights_data, const RuntimeShape &unextended_bias_shape, const float *bias_data, const RuntimeShape &unextended_prev_state_shape, const float *prev_state_data, const RuntimeShape &unextended_output_state_shape, float *output_state_data, const RuntimeShape &unextended_output_activ_shape, float *output_activ_data, const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data, const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
 
template<int StateIntegerBits>
void LstmCell (const LstmCellParams &params, const RuntimeShape &unextended_input_shape, const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape, const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape, const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape, const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape, const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape, int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape, uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape, uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape, int16 *activ_temp_data_int16, void *gemmlowp_context)
 
template<typename Scalar >
void Split (const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data, const RuntimeShape *const *output_shapes, Scalar *const *output_data)
 
int NodeOffset (int b, int h, int w, int height, int width)
 
void LocalResponseNormalization (const tflite::LocalResponseNormalizationParams &op_params, const RuntimeShape &input_shape, const float *input_data, const RuntimeShape &output_shape, float *output_data)
 
void Dequantize (const RuntimeShape &input_shape, const Eigen::half *input_data, const RuntimeShape &output_shape, float *output_data)
 
void FakeQuant (const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape, const float *input_data, const RuntimeShape &output_shape, float *output_data)
 
GatherNdHelperResult GatherNdHelper (const RuntimeShape &params_shape, const RuntimeShape &indices_shape)
 
template<typename ParamsT , typename IndicesT = int32>
void GatherNd (const RuntimeShape &params_shape, const ParamsT *params_data, const RuntimeShape &indices_shape, const IndicesT *indices_data, const RuntimeShape &output_shape, ParamsT *output_data)
 
template<typename IndicesT = int32>
void GatherNdString (const RuntimeShape &params_shape, const TfLiteTensor *params_data, const RuntimeShape &indices_shape, const IndicesT *indices_data, const RuntimeShape &output_shape, TfLiteTensor *output_data)
 
template<typename IndicesT , typename UpdatesT >
void ScatterNd (const RuntimeShape &indices_shape, const IndicesT *indices_data, const RuntimeShape &updates_shape, const UpdatesT *updates_data, const RuntimeShape &output_shape, UpdatesT *output_data)
 
template<typename T >
void Slice (const tflite::SliceParams &op_params, const RuntimeShape &input_shape, const RuntimeShape &output_shape, SequentialTensorWriter< T > *writer)
 
template<typename T >
void Slice (const tflite::SliceParams &op_params, const RuntimeShape &input_shape, const T *input_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void Slice (const tflite::SliceParams &op_params, const RuntimeShape &input_shape, const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
 
template<typename T >
void Minimum (const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void Minimum (const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, const T *input2_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void Maximum (const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void Maximum (const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &, const T *input2_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T1 , typename T2 , typename T3 >
void ArgMax (const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const RuntimeShape &output_shape, T2 *output_data)
 
template<typename T1 , typename T2 , typename T3 >
void ArgMax (const RuntimeShape &input1_shape, const T1 *input1_data, const RuntimeShape &input2_shape, const T3 *input2_data, const RuntimeShape &output_shape, T2 *output_data)
 
template<typename D , typename T >
void Select (const RuntimeShape &input_condition_shape, const D *input_condition_data, const RuntimeShape &input_x_shape, const T *input_x_data, const RuntimeShape &input_y_shape, const T *input_y_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename D , typename T >
void RankOneSelect (const RuntimeShape &input_condition_shape, const D *input_condition_data, const RuntimeShape &input_x_shape, const T *input_x_data, const RuntimeShape &input_y_shape, const T *input_y_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename D , typename T >
void BroadcastSelect4DSlow (const RuntimeShape &input_condition_shape, const D *input_condition_data, const RuntimeShape &input_x_shape, const T *input_x_data, const RuntimeShape &input_y_shape, const T *input_y_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename D , typename T >
void SelectTrueCoords (const RuntimeShape &input_condition_shape, const D *input_condition_data, T *output_data)
 
template<typename T , typename TI >
void SparseToDense (const std::vector< std::vector< TI > > &indices, const T *values, T default_value, bool value_is_scalar, const RuntimeShape &unextended_output_shape, T *output_data)
 
template<typename T >
void Pow (const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &input2_shape, const T *input2_data, const RuntimeShape &output_shape, T *output_data)
 
template<typename T >
void BroadcastPow4DSlow (const RuntimeShape &unextended_input1_shape, const T *input1_data, const RuntimeShape &unextended_input2_shape, const T *input2_data, const RuntimeShape &unextended_output_shape, T *output_data)
 
template<typename Scalar >
void Reverse (int axis, const RuntimeShape &input_shape, const Scalar *input_data, const RuntimeShape &output_shape, Scalar *output_data)
 
template<typename Scalar , typename TS >
void ReverseSequence (const TS *seq_lengths, const int seq_dim, const int batch_dim, const RuntimeShape &input_shape, const Scalar *input_data, const RuntimeShape &output_shape, Scalar *output_data)
 
template<typename T >
void SegmentSum (const RuntimeShape &input_shape, const T *input_data, const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data, const RuntimeShape &output_shape, T *output_data)
 

Function Documentation

◆ ArgMax() [1/2]

template<typename T1 , typename T2 , typename T3 >
void tflite::reference_ops::ArgMax ( const RuntimeShape &  input1_shape,
const T1 *  input1_data,
const RuntimeShape &  input2_shape,
const T3 *  input2_data,
const RuntimeShape &  output_shape,
T2 *  output_data 
)
inline

Definition at line 1188 of file PALreference_ops.h.

1191{
1192 // Drop shape of second input: not needed.
1193 ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
1194}
const luci_interpreter::RuntimeShape output_shape
void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data, const RuntimeShape &output_shape, T2 *output_data)

References ArgMax(), and output_shape.

◆ ArgMax() [2/2]

template<typename T1 , typename T2 , typename T3 >
void tflite::reference_ops::ArgMax ( const RuntimeShape &  input1_shape,
const T1 *  input1_data,
const T3 *  input2_data,
const RuntimeShape &  output_shape,
T2 *  output_data 
)

Definition at line 1179 of file PALreference_ops.h.

1181{
1182 ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
1183}

References output_shape.

Referenced by ArgMax().

◆ BroadcastMulFivefold()

void tflite::reference_ops::BroadcastMulFivefold ( const ArithmeticParams &  unswitched_params,
const RuntimeShape &  unswitched_input1_shape,
const uint8 unswitched_input1_data,
const RuntimeShape &  unswitched_input2_shape,
const uint8 unswitched_input2_data,
const RuntimeShape &  output_shape,
uint8 output_data 
)
inline

Definition at line 191 of file PALreference_ops.h.

197{
198 ArithmeticParams switched_params = unswitched_params;
199 switched_params.input1_offset = unswitched_params.input2_offset;
200 switched_params.input2_offset = unswitched_params.input1_offset;
201
202 const bool use_unswitched = unswitched_params.broadcast_category ==
203 tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
204
205 const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
206 const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
207 const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
208
209 // Fivefold nested loops. The second input resets its position for each
210 // iteration of the second loop. The first input resets its position at the
211 // beginning of the fourth loop. The innermost loop is an elementwise Mul of
212 // sections of the arrays.
213 uint8 *output_data_ptr = output_data;
214 const uint8 *input1_data_ptr = input1_data;
215 const uint8 *input2_data_reset = input2_data;
216 int y0 = params.broadcast_shape[0];
217 int y1 = params.broadcast_shape[1];
218 int y2 = params.broadcast_shape[2];
219 int y3 = params.broadcast_shape[3];
220 int y4 = params.broadcast_shape[4];
221 for (int i0 = 0; i0 < y0; ++i0)
222 {
223 const uint8 *input2_data_ptr;
224 for (int i1 = 0; i1 < y1; ++i1)
225 {
226 input2_data_ptr = input2_data_reset;
227 for (int i2 = 0; i2 < y2; ++i2)
228 {
229 for (int i3 = 0; i3 < y3; ++i3)
230 {
231 MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
232 input2_data_ptr += y4;
233 output_data_ptr += y4;
234 }
235 input1_data_ptr += y4;
236 }
237 }
238 input2_data_reset = input2_data_ptr;
239 }
240}
std::uint8_t uint8
Definition Macro.h:52

◆ BroadcastPow4DSlow()

template<typename T >
void tflite::reference_ops::BroadcastPow4DSlow ( const RuntimeShape &  unextended_input1_shape,
const T *  input1_data,
const RuntimeShape &  unextended_input2_shape,
const T *  input2_data,
const RuntimeShape &  unextended_output_shape,
T *  output_data 
)
inline

Definition at line 1388 of file PALreference_ops.h.

1391{
1392 TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
1393 TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
1394 TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
1395 const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
1396
1399 NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
1400 &desc2);
1401
1402 for (int b = 0; b < output_shape.Dims(0); ++b)
1403 {
1404 for (int y = 0; y < output_shape.Dims(1); ++y)
1405 {
1406 for (int x = 0; x < output_shape.Dims(2); ++x)
1407 {
1408 for (int c = 0; c < output_shape.Dims(3); ++c)
1409 {
1410 auto out_idx = Offset(output_shape, b, y, x, c);
1411 auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
1412 auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
1413 auto in1_val = input1_data[in1_idx];
1414 auto in2_val = input2_data[in2_idx];
1415 output_data[out_idx] = std::pow(in1_val, in2_val);
1416 }
1417 }
1418 }
1419 }
1420}
void NdArrayDescsForElementwiseBroadcast(const Dims< N > &input0_dims, const Dims< N > &input1_dims, NdArrayDesc< N > *desc0_out, NdArrayDesc< N > *desc1_out)
Definition NDArray.h:89
int SubscriptToIndex(const NdArrayDesc< 4 > &desc, int i0, int i1, int i2, int i3)
Definition NDArray.h:54
int Offset(const Dims< 4 > &dims, int i0, int i1, int i2, int i3)
Definition Dims.h:64
NdArrayDesc< 4 > desc1
NdArrayDesc< 4 > desc2

References desc1, desc2, NdArrayDescsForElementwiseBroadcast(), Offset(), output_shape, and SubscriptToIndex().

◆ BroadcastSelect4DSlow()

template<typename D , typename T >
void tflite::reference_ops::BroadcastSelect4DSlow ( const RuntimeShape &  input_condition_shape,
const D *  input_condition_data,
const RuntimeShape &  input_x_shape,
const T *  input_x_data,
const RuntimeShape &  input_y_shape,
const T *  input_y_data,
const RuntimeShape &  output_shape,
T *  output_data 
)

Definition at line 1248 of file PALreference_ops.h.

1252{
1253 TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
1254 TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
1255 TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
1256 TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
1257
1258 const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
1259
1260 NdArrayDesc<4> desc_condition;
1261 NdArrayDesc<4> desc_x;
1262 NdArrayDesc<4> desc_y;
1263 NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
1264 &desc_condition, &desc_x, &desc_y);
1265
1266 // In Tensorflow, the dimensions are canonically named (batch_number, row,
1267 // col, channel), with extents (batches, height, width, depth), with the
1268 // trailing dimension changing most rapidly (channels has the smallest
1269 // stride, typically 1 element).
1270 //
1271 // In generated C code, we store arrays with the dimensions reversed. The
1272 // first dimension has smallest stride.
1273 //
1274 // We name our variables by their Tensorflow convention, but generate C code
1275 // nesting loops such that the innermost loop has the smallest stride for
1276 // the best cache behavior.
1277 for (int b = 0; b < extended_output_shape.Dims(0); ++b)
1278 {
1279 for (int y = 0; y < extended_output_shape.Dims(1); ++y)
1280 {
1281 for (int x = 0; x < extended_output_shape.Dims(2); ++x)
1282 {
1283 for (int c = 0; c < extended_output_shape.Dims(3); ++c)
1284 {
1285 const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
1286 const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
1287 const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
1288 output_data[Offset(extended_output_shape, b, y, x, c)] =
1289 input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
1290 }
1291 }
1292 }
1293 }
1294}

References NdArrayDescsForElementwiseBroadcast(), Offset(), output_shape, and SubscriptToIndex().

◆ DepthConcatenation()

template<typename Scalar >
void tflite::reference_ops::DepthConcatenation ( const ConcatenationParams &  params,
const RuntimeShape *const *  input_shapes,
const Scalar *const *  input_data,
const RuntimeShape &  output_shape,
Scalar *  output_data 
)

Definition at line 462 of file PALreference_ops.h.

465{
466 ruy::profiler::ScopeLabel label("DepthConcatenation");
467 auto params_copy = params;
468 params_copy.axis = 3;
469 Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
470}
void Concatenation(int concat_dim, const Scalar *const *input_data, const Dims< 4 > *const *input_dims, int inputs_count, Scalar *output_data, const Dims< 4 > &output_dims)

References Concatenation(), and output_shape.

◆ Dequantize()

void tflite::reference_ops::Dequantize ( const RuntimeShape &  input_shape,
const Eigen::half *  input_data,
const RuntimeShape &  output_shape,
float *  output_data 
)
inline

Definition at line 913 of file PALreference_ops.h.

915{
916 const int flat_size = MatchingFlatSize(input_shape, output_shape);
917 for (int i = 0; i < flat_size; i++)
918 {
919 output_data[i] = static_cast<float>(input_data[i]);
920 }
921}
int MatchingFlatSize(const Dims< N > &dims, const Dims< N > &check_dims_0)
Definition Dims.h:108

References MatchingFlatSize(), and output_shape.

◆ FakeQuant()

void tflite::reference_ops::FakeQuant ( const tflite::FakeQuantParams &  op_params,
const RuntimeShape &  input_shape,
const float *  input_data,
const RuntimeShape &  output_shape,
float *  output_data 
)
inline

Definition at line 923 of file PALreference_ops.h.

925{
926 ruy::profiler::ScopeLabel label("FakeQuant");
927 float rmin = op_params.minmax.min;
928 float rmax = op_params.minmax.max;
929 int num_bits = op_params.num_bits;
930 // 0 should always be a representable value. Let's assume that the initial
931 // min,max range contains 0.
932 TFLITE_DCHECK_LE(rmin, 0.0f);
933 TFLITE_DCHECK_GE(rmax, 0.0f);
934 TFLITE_DCHECK_LT(rmin, rmax);
935
936 // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
937 int quant_min = 0;
938 int quant_max = (1 << num_bits) - 1;
939 float nudged_min, nudged_max, nudged_scale;
940 NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
941 const int flat_size = MatchingFlatSize(input_shape, output_shape);
942 FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
943}

References MatchingFlatSize(), and output_shape.

◆ GatherNd()

template<typename ParamsT , typename IndicesT = int32>
void tflite::reference_ops::GatherNd ( const RuntimeShape &  params_shape,
const ParamsT *  params_data,
const RuntimeShape &  indices_shape,
const IndicesT *  indices_data,
const RuntimeShape &  output_shape,
ParamsT *  output_data 
)
inline

Definition at line 985 of file PALreference_ops.h.

988{
989 ruy::profiler::ScopeLabel label("GatherNd");
990
991 const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
992 for (int i = 0; i < res.n_slices; ++i)
993 {
994 int from_pos = 0;
995 for (int j = 0; j < res.indices_nd; ++j)
996 {
997 from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
998 }
999 std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
1000 sizeof(ParamsT) * res.slice_size);
1001 }
1002}
GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape, const RuntimeShape &indices_shape)

References tflite::reference_ops::GatherNdHelperResult::dims_to_count, GatherNdHelper(), tflite::reference_ops::GatherNdHelperResult::indices_nd, tflite::reference_ops::GatherNdHelperResult::n_slices, and tflite::reference_ops::GatherNdHelperResult::slice_size.

◆ GatherNdHelper()

GatherNdHelperResult tflite::reference_ops::GatherNdHelper ( const RuntimeShape &  params_shape,
const RuntimeShape &  indices_shape 
)
inline

Definition at line 955 of file PALreference_ops.h.

957{
959 ret.n_slices = 1;
960 ret.slice_size = 1;
961 const int indices_dims = indices_shape.DimensionsCount();
962 ret.indices_nd = indices_shape.Dims(indices_dims - 1);
963 const int params_dims = params_shape.DimensionsCount();
964 for (int i = 0; i < indices_dims - 1; ++i)
965 {
966 ret.n_slices *= indices_shape.Dims(i);
967 }
968 for (int i = ret.indices_nd; i < params_dims; ++i)
969 {
970 ret.slice_size *= params_shape.Dims(i);
971 }
972
973 int remain_flat_size = params_shape.FlatSize();
974 ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
975 for (int i = 0; i < ret.indices_nd; ++i)
976 {
977 ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
978 remain_flat_size = ret.dims_to_count[i];
979 }
980
981 return ret;
982}

References tflite::reference_ops::GatherNdHelperResult::dims_to_count, tflite::reference_ops::GatherNdHelperResult::indices_nd, tflite::reference_ops::GatherNdHelperResult::n_slices, and tflite::reference_ops::GatherNdHelperResult::slice_size.

Referenced by GatherNd(), and GatherNdString().

◆ GatherNdString()

template<typename IndicesT = int32>
void tflite::reference_ops::GatherNdString ( const RuntimeShape &  params_shape,
const TfLiteTensor *  params_data,
const RuntimeShape &  indices_shape,
const IndicesT *  indices_data,
const RuntimeShape &  output_shape,
TfLiteTensor *  output_data 
)
inline

Definition at line 1006 of file PALreference_ops.h.

1009{
1010 ruy::profiler::ScopeLabel label("GatherNdString");
1011
1012 const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
1013 DynamicBuffer buffer;
1014 for (int i = 0; i < res.n_slices; ++i)
1015 {
1016 int from_pos = 0;
1017 for (int j = 0; j < res.indices_nd; ++j)
1018 {
1019 from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
1020 }
1021 for (int j = 0; j < res.slice_size; ++j)
1022 {
1023 buffer.AddString(GetString(params_data, from_pos + j));
1024 }
1025 }
1026 buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
1027}

References tflite::reference_ops::GatherNdHelperResult::dims_to_count, GatherNdHelper(), tflite::reference_ops::GatherNdHelperResult::indices_nd, tflite::reference_ops::GatherNdHelperResult::n_slices, and tflite::reference_ops::GatherNdHelperResult::slice_size.

◆ LocalResponseNormalization()

void tflite::reference_ops::LocalResponseNormalization ( const tflite::LocalResponseNormalizationParams &  op_params,
const RuntimeShape &  input_shape,
const float *  input_data,
const RuntimeShape &  output_shape,
float *  output_data 
)
inline

Definition at line 887 of file PALreference_ops.h.

890{
891 const int trailing_dim = input_shape.DimensionsCount() - 1;
892 const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
893 const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
894
895 for (int i = 0; i < outer_size; ++i)
896 {
897 for (int c = 0; c < depth; ++c)
898 {
899 const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
900 const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
901 float accum = 0.f;
902 for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
903 {
904 const float input_val = input_data[i * depth + input_c];
905 accum += input_val * input_val;
906 }
907 const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
908 output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
909 }
910 }
911}

References output_shape.

◆ LstmCell() [1/2]

void tflite::reference_ops::LstmCell ( const LstmCellParams &  params,
const RuntimeShape &  unextended_input_shape,
const float *  input_data,
const RuntimeShape &  unextended_prev_activ_shape,
const float *  prev_activ_data,
const RuntimeShape &  weights_shape,
const float *  weights_data,
const RuntimeShape &  unextended_bias_shape,
const float *  bias_data,
const RuntimeShape &  unextended_prev_state_shape,
const float *  prev_state_data,
const RuntimeShape &  unextended_output_state_shape,
float *  output_state_data,
const RuntimeShape &  unextended_output_activ_shape,
float *  output_activ_data,
const RuntimeShape &  unextended_concat_temp_shape,
float *  concat_temp_data,
const RuntimeShape &  unextended_activ_temp_shape,
float *  activ_temp_data 
)
inline

Definition at line 472 of file PALreference_ops.h.

482{
483 TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
484 TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
485 TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
486 TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
487 TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
488 TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
489 TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
490 TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
491 const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
492 const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
493 const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
494 const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
495 const RuntimeShape output_state_shape =
496 RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
497 const RuntimeShape output_activ_shape =
498 RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
499 const RuntimeShape concat_temp_shape =
500 RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
501 const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
502 TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
503
504 const int weights_dim_count = weights_shape.DimensionsCount();
505 const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
506 output_state_shape, 0, output_activ_shape, 0);
507 const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
508 output_state_shape, 1, output_activ_shape, 1);
509 const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
510 output_state_shape, 2, output_activ_shape, 2);
511 const int input_depth = input_shape.Dims(3);
512 const int prev_activ_depth = prev_activ_shape.Dims(3);
513 const int total_input_depth = prev_activ_depth + input_depth;
514 TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
515 TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
516 const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
517 TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
518 TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
519 const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
520 3, output_activ_shape, 3);
521 TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
522
523 // Concatenate prev_activ and input data together
524 std::vector<float const *> concat_input_arrays_data;
525 std::vector<RuntimeShape const *> concat_input_arrays_shapes;
526 concat_input_arrays_data.push_back(input_data);
527 concat_input_arrays_data.push_back(prev_activ_data);
528 concat_input_arrays_shapes.push_back(&input_shape);
529 concat_input_arrays_shapes.push_back(&prev_activ_shape);
530 tflite::ConcatenationParams concat_params;
531 concat_params.axis = 3;
532 concat_params.inputs_count = concat_input_arrays_data.size();
533 Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
534 concat_temp_shape, concat_temp_data);
535
536 // Fully connected
537 tflite::FullyConnectedParams fc_params;
538 fc_params.float_activation_min = std::numeric_limits<float>::lowest();
539 fc_params.float_activation_max = std::numeric_limits<float>::max();
540 FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
541 bias_shape, bias_data, activ_temp_shape, activ_temp_data);
542
543 // Memory state update (the LSTM "guts")
544 for (int b = 0; b < batches; ++b)
545 {
546 for (int w = 0; w < width; ++w)
547 {
548 for (int h = 0; h < height; ++h)
549 {
550 for (int c = 0; c < output_depth; ++c)
551 {
552 const float input_gate =
553 1.f /
554 (1.f +
555 std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
556 const float new_input =
557 std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
558 const float forget_gate =
559 1.f /
560 (1.f +
561 std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
562 const float output_gate =
563 1.f /
564 (1.f +
565 std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
566 const float new_state =
567 input_gate * new_input +
568 forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
569 output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
570 output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
571 output_gate * std::tanh(new_state);
572 }
573 }
574 }
575 }
576}
void FullyConnected(const float *input_data, const Dims< 4 > &input_dims, const float *weights_data, const Dims< 4 > &weights_dims, const float *bias_data, const Dims< 4 > &bias_dims, float *output_data, const Dims< 4 > &output_dims)

References Concatenation(), FullyConnected(), and Offset().

◆ LstmCell() [2/2]

template<int StateIntegerBits>
void tflite::reference_ops::LstmCell ( const LstmCellParams &  params,
const RuntimeShape &  unextended_input_shape,
const uint8 input_data_uint8,
const RuntimeShape &  unextended_prev_activ_shape,
const uint8 prev_activ_data_uint8,
const RuntimeShape &  weights_shape,
const uint8 weights_data_uint8,
const RuntimeShape &  unextended_bias_shape,
const int32 bias_data_int32,
const RuntimeShape &  unextended_prev_state_shape,
const int16 prev_state_data_int16,
const RuntimeShape &  unextended_output_state_shape,
int16 output_state_data_int16,
const RuntimeShape &  unextended_output_activ_shape,
uint8 output_activ_data_uint8,
const RuntimeShape &  unextended_concat_temp_shape,
uint8 concat_temp_data_uint8,
const RuntimeShape &  unextended_activ_temp_shape,
int16 activ_temp_data_int16,
void *  gemmlowp_context 
)
inline

Definition at line 664 of file PALreference_ops.h.

674{
675 (void)gemmlowp_context; // only used in optimized code.
676 int32 weights_zero_point = params.weights_zero_point;
677 int32 accum_multiplier = params.accum_multiplier;
678 int accum_shift = params.accum_shift;
679 TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
680 TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
681 TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
682 TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
683 TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
684 TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
685 TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
686 TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
687 const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
688 const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
689 const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
690 const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
691 const RuntimeShape output_state_shape =
692 RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
693 const RuntimeShape output_activ_shape =
694 RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
695 const RuntimeShape concat_temp_shape =
696 RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
697 const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
698 TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
699
700 // Gather dimensions information, and perform consistency checks.
701 const int weights_dim_count = weights_shape.DimensionsCount();
702 const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
703 output_state_shape, output_activ_shape);
704 const int input_depth = input_shape.Dims(3);
705 const int prev_activ_depth = prev_activ_shape.Dims(3);
706 const int total_input_depth = prev_activ_depth + input_depth;
707 TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
708 const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
709 TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
710 TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
711 TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
712 const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
713 3, output_activ_shape, 3);
714 TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
715 const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
716 const int fc_output_depth =
717 MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
718 const int fc_accum_depth = total_input_depth;
719 TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
720
721 // Depth-concatenate prev_activ and input data together.
722 uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
723 const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
724 tflite::ConcatenationParams concat_params;
725 concat_params.axis = 3;
726 concat_params.inputs_count = 2;
727 Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
728 concat_temp_shape, concat_temp_data_uint8);
729
730 // Implementation of the fully connected node inside the LSTM cell.
731 // The operands are 8-bit integers, the accumulators are internally 32bit
732 // integers, and the output is 16-bit fixed-point with 3 integer bits so
733 // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
734 // is explained in the function comment above.
735 for (int b = 0; b < fc_batches; ++b)
736 {
737 for (int out_c = 0; out_c < fc_output_depth; ++out_c)
738 {
739 // Internal accumulation.
740 // Initialize accumulator with the bias-value.
741 int32 accum = bias_data_int32[out_c];
742 // Accumulation loop.
743 for (int d = 0; d < fc_accum_depth; ++d)
744 {
745 int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
746 int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
747 accum += input_val * weights_val;
748 }
749 // Down-scale the final int32 accumulator to the scale used by our
750 // (16-bit, using 3 integer bits) fixed-point format. The quantized
751 // multiplier and shift here have been pre-computed offline
752 // (e.g. by toco).
753 accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
754 // Saturate, cast to int16, and store to the temporary activations array.
755 accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
756 activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
757 }
758 }
759
760 // Rest of the LSTM cell: tanh and logistic math functions, and some adds
761 // and muls, all done in 16-bit fixed-point.
762 for (int b = 0; b < outer_size; ++b)
763 {
764 for (int c = 0; c < output_depth; ++c)
765 {
766 // Define the fixed-point data types that we will use here. All use
767 // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
768 // They only differ by the number of integral vs. fractional bits,
769 // determining the range of values that they can represent.
770 //
771 // F0 uses 0 integer bits, range [-1, 1].
772 // This is the return type of math functions such as tanh, logistic,
773 // whose range is in [-1, 1].
774 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
775 // F3 uses 3 integer bits, range [-8, 8].
776 // This is the range of the previous fully-connected node's output,
777 // which is our input here.
778 using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
779 // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
780 // 2^StateIntegerBits]. It's used to represent the internal state, whose
781 // number of integer bits is currently dictated by the model. See comment
782 // on the StateIntegerBits template parameter above.
783 using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
784 // Implementation of input gate, using fixed-point logistic function.
785 F3 input_gate_input =
786 F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
787 F0 input_gate_output = gemmlowp::logistic(input_gate_input);
788 // Implementation of input modulation gate, using fixed-point tanh
789 // function.
790 F3 input_modulation_gate_input =
791 F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
792 F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
793 // Implementation of forget gate, using fixed-point logistic function.
794 F3 forget_gate_input =
795 F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
796 F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
797 // Implementation of output gate, using fixed-point logistic function.
798 F3 output_gate_input =
799 F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
800 F0 output_gate_output = gemmlowp::logistic(output_gate_input);
801 // Implementation of internal multiplication nodes, still in fixed-point.
802 F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
803 FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
804 FS prev_state_times_forget_state = forget_gate_output * prev_state;
805 // Implementation of internal addition node, saturating.
806 FS new_state =
807 gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
808 prev_state_times_forget_state);
809 // Implementation of last internal Tanh node, still in fixed-point.
810 // Since a Tanh fixed-point implementation is specialized for a given
811 // number or integer bits, and each specialization can have a substantial
812 // code size, and we already used above a Tanh on an input with 3 integer
813 // bits, and per the table in the above function comment there is no
814 // significant accuracy to be lost by clamping to [-8, +8] for a
815 // 3-integer-bits representation, let us just do that. This helps people
816 // porting this to targets where code footprint must be minimized.
817 F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
818 F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
819 // Store the new internal state back to memory, as 16-bit integers.
820 // Note: here we store the original value with StateIntegerBits, not
821 // the rescaled 3-integer-bits value fed to tanh.
822 output_state_data_int16[b * output_depth + c] = new_state.raw();
823 // Down-scale the output activations to 8-bit integers, saturating,
824 // and store back to memory.
825 int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
826 int16 clamped_output_activ =
827 std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
828 output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
829 }
830 }
831}
std::int16_t int16
Definition Macro.h:53
int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
Definition Utils.h:96
int32_t int32
Definition topk_v2.h:27

References Concatenation().

◆ Maximum() [1/2]

template<typename T >
void tflite::reference_ops::Maximum ( const RuntimeShape &  input1_shape,
const T *  input1_data,
const RuntimeShape &  ,
const T *  input2_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 1171 of file PALreference_ops.h.

1173{
1174 // Drop shape of second input: not needed.
1175 Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
1176}
void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, const RuntimeShape &output_shape, T *output_data)

References Maximum(), and output_shape.

◆ Maximum() [2/2]

template<typename T >
void tflite::reference_ops::Maximum ( const RuntimeShape &  input1_shape,
const T *  input1_data,
const T *  input2_data,
const RuntimeShape &  output_shape,
T *  output_data 
)

Definition at line 1156 of file PALreference_ops.h.

1158{
1159 const int flat_size = MatchingFlatSize(input1_shape, output_shape);
1160
1161 auto max_value = input2_data[0];
1162 for (int i = 0; i < flat_size; i++)
1163 {
1164 output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
1165 }
1166}

References MatchingFlatSize(), and output_shape.

Referenced by Maximum().

◆ Minimum() [1/2]

template<typename T >
void tflite::reference_ops::Minimum ( const RuntimeShape &  input1_shape,
const T *  input1_data,
const RuntimeShape &  ,
const T *  input2_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 1148 of file PALreference_ops.h.

1150{
1151 // Drop shape of second input: not needed.
1152 Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
1153}
void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data, const RuntimeShape &output_shape, T *output_data)

References Minimum(), and output_shape.

◆ Minimum() [2/2]

template<typename T >
void tflite::reference_ops::Minimum ( const RuntimeShape &  input1_shape,
const T *  input1_data,
const T *  input2_data,
const RuntimeShape &  output_shape,
T *  output_data 
)

Definition at line 1133 of file PALreference_ops.h.

1135{
1136 const int flat_size = MatchingFlatSize(input1_shape, output_shape);
1137
1138 auto min_value = input2_data[0];
1139 for (int i = 0; i < flat_size; i++)
1140 {
1141 output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
1142 }
1143}

References MatchingFlatSize(), and output_shape.

Referenced by Minimum().

◆ Mul() [1/2]

void tflite::reference_ops::Mul ( const ArithmeticParams &  params,
const RuntimeShape &  input1_shape,
const int16 input1_data,
const RuntimeShape &  input2_shape,
const int16 input2_data,
const RuntimeShape &  output_shape,
int16 output_data 
)
inline

Definition at line 242 of file PALreference_ops.h.

245{
246 ruy::profiler::ScopeLabel label("Mul/Int16");
247
248 const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
249
250 for (int i = 0; i < flat_size; i++)
251 {
252 // F0 uses 0 integer bits, range [-1, 1].
253 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
254
255 F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
256 output_data[i] = unclamped_result.raw();
257 }
258}

References output_shape.

◆ Mul() [2/2]

void tflite::reference_ops::Mul ( const ArithmeticParams &  params,
const RuntimeShape &  input1_shape,
const int16 input1_data,
const RuntimeShape &  input2_shape,
const int16 input2_data,
const RuntimeShape &  output_shape,
uint8 output_data 
)
inline

Definition at line 260 of file PALreference_ops.h.

263{
264 ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
265 int32 output_offset = params.output_offset;
266 int32 output_activation_min = params.quantized_activation_min;
267 int32 output_activation_max = params.quantized_activation_max;
268 TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
269
270 const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
271
272 for (int i = 0; i < flat_size; i++)
273 {
274 // F0 uses 0 integer bits, range [-1, 1].
275 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
276
277 F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
278 int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
279 int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
280 clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
281 output_data[i] = output_offset + clamped_result;
282 }
283}

References output_shape.

◆ NodeOffset()

int tflite::reference_ops::NodeOffset ( int  b,
int  h,
int  w,
int  height,
int  width 
)
inline

Definition at line 882 of file PALreference_ops.h.

883{
884 return (b * height + h) * width + w;
885}

◆ Pack()

template<typename Scalar >
void tflite::reference_ops::Pack ( const PackParams &  params,
const RuntimeShape *const *  input_shapes,
const Scalar *const *  input_data,
const RuntimeShape &  output_shape,
Scalar *  output_data 
)

Definition at line 338 of file PALreference_ops.h.

340{
341 ruy::profiler::ScopeLabel label("Pack");
342 const int dimensions = output_shape.DimensionsCount();
343 int axis = params.axis;
344 int inputs_count = params.inputs_count;
345
346 int outer_size = 1;
347 for (int i = 0; i < axis; i++)
348 {
349 outer_size *= output_shape.Dims(i);
350 }
351 int copy_size = 1;
352 for (int i = params.axis + 1; i < dimensions; i++)
353 {
354 copy_size *= output_shape.Dims(i);
355 }
356 TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
357
358 for (int i = 0; i < inputs_count; ++i)
359 {
360 for (int k = 0; k < outer_size; k++)
361 {
362 const Scalar *input_ptr = input_data[i] + copy_size * k;
363 int loc = k * inputs_count * copy_size + i * copy_size;
364 memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
365 }
366 }
367}

References output_shape.

◆ PackWithScaling()

template<typename Scalar >
void tflite::reference_ops::PackWithScaling ( const PackParams &  params,
const RuntimeShape *const *  input_shapes,
const uint8 *const *  input_data,
const RuntimeShape &  output_shape,
uint8 output_data 
)

Definition at line 408 of file PALreference_ops.h.

411{
412 ruy::profiler::ScopeLabel label("PackWithScaling");
413 const int dimensions = output_shape.DimensionsCount();
414 int axis = params.axis;
415 const int32 *input_zeropoint = params.input_zeropoint;
416 const float *input_scale = params.input_scale;
417 int inputs_count = params.inputs_count;
418 const int32 output_zeropoint = params.output_zeropoint;
419 const float output_scale = params.output_scale;
420
421 int outer_size = 1;
422 for (int i = 0; i < axis; i++)
423 {
424 outer_size *= output_shape.Dims(i);
425 }
426 int copy_size = 1;
427 for (int i = axis + 1; i < dimensions; i++)
428 {
429 copy_size *= output_shape.Dims(i);
430 }
431 TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
432
433 Scalar *output_ptr = output_data;
434 const float inverse_output_scale = 1.f / output_scale;
435 for (int k = 0; k < outer_size; k++)
436 {
437 for (int i = 0; i < inputs_count; ++i)
438 {
439 if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
440 {
441 memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
442 }
443 else
444 {
445 assert(false);
446 const float scale = input_scale[i] * inverse_output_scale;
447 const float bias = -input_zeropoint[i] * scale;
448 auto input_ptr = input_data[i];
449 for (int j = 0; j < copy_size; ++j)
450 {
451 const int value =
452 static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
453 output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
454 }
455 }
456 output_ptr += copy_size;
457 }
458 }
459}
list input_data
Definition infer.py:29

References output_shape.

◆ Pow()

template<typename T >
void tflite::reference_ops::Pow ( const RuntimeShape &  input1_shape,
const T *  input1_data,
const RuntimeShape &  input2_shape,
const T *  input2_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 1376 of file PALreference_ops.h.

1379{
1380 const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
1381 for (int i = 0; i < flat_size; ++i)
1382 {
1383 output_data[i] = std::pow(input1_data[i], input2_data[i]);
1384 }
1385}

References MatchingFlatSize(), and output_shape.

◆ RankOneSelect()

template<typename D , typename T >
void tflite::reference_ops::RankOneSelect ( const RuntimeShape &  input_condition_shape,
const D *  input_condition_data,
const RuntimeShape &  input_x_shape,
const T *  input_x_data,
const RuntimeShape &  input_y_shape,
const T *  input_y_data,
const RuntimeShape &  output_shape,
T *  output_data 
)

Definition at line 1221 of file PALreference_ops.h.

1225{
1226 const int64_t outer_size = input_condition_shape.FlatSize();
1227 int64_t inner_size;
1228 if (input_condition_shape.DimensionsCount() == 0)
1229 {
1230 inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
1231 }
1232 else
1233 {
1234 TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
1235 inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
1236 }
1237
1238 int64_t offset = 0;
1239 for (int64_t i = 0; i < outer_size; i++)
1240 {
1241 const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
1242 memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
1243 offset += inner_size;
1244 }
1245}
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540

References MatchingFlatSize(), offset(), and output_shape.

◆ Relu()

template<typename T >
void tflite::reference_ops::Relu ( const RuntimeShape &  input_shape,
const T *  input_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 109 of file PALreference_ops.h.

111{
112 const int flat_size = MatchingFlatSize(input_shape, output_shape);
113 for (int i = 0; i < flat_size; ++i)
114 {
115 const T val = input_data[i];
116 const T lower = 0;
117 const T clamped = val < lower ? lower : val;
118 output_data[i] = clamped;
119 }
120}

References MatchingFlatSize(), and output_shape.

◆ Relu1()

template<typename T >
void tflite::reference_ops::Relu1 ( const RuntimeShape &  input_shape,
const T *  input_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 123 of file PALreference_ops.h.

125{
126 ruy::profiler::ScopeLabel label("Relu1 (not fused)");
127 const int flat_size = MatchingFlatSize(input_shape, output_shape);
128 for (int i = 0; i < flat_size; ++i)
129 {
130 const T val = input_data[i];
131 const T upper = 1;
132 const T lower = -1;
133 const T clamped = val > upper ? upper : val < lower ? lower : val;
134 output_data[i] = clamped;
135 }
136}

References MatchingFlatSize(), and output_shape.

◆ Relu6()

void tflite::reference_ops::Relu6 ( const RuntimeShape &  input_shape,
const float *  input_data,
const RuntimeShape &  output_shape,
float *  output_data 
)
inline

Definition at line 138 of file PALreference_ops.h.

140{
141 ruy::profiler::ScopeLabel label("Relu6 (not fused)");
142 const int flat_size = MatchingFlatSize(input_shape, output_shape);
143 for (int i = 0; i < flat_size; ++i)
144 {
145 const float val = input_data[i];
146 const float upper = 6;
147 const float lower = 0;
148 const float clamped = val > upper ? upper : val < lower ? lower : val;
149 output_data[i] = clamped;
150 }
151}

References MatchingFlatSize(), and output_shape.

◆ ReluX() [1/2]

template<typename T >
void tflite::reference_ops::ReluX ( const tflite::ActivationParams &  params,
const RuntimeShape &  input_shape,
const T *  input_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 172 of file PALreference_ops.h.

174{
175 ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
176 const int flat_size = MatchingFlatSize(input_shape, output_shape);
177 const T max_value = params.quantized_activation_max;
178 const T min_value = params.quantized_activation_min;
179 for (int i = 0; i < flat_size; ++i)
180 {
181 const T val = input_data[i];
182 const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
183 output_data[i] = clamped;
184 }
185}

References MatchingFlatSize(), and output_shape.

◆ ReluX() [2/2]

template<typename T >
void tflite::reference_ops::ReluX ( const tflite::ReluParams &  params,
const RuntimeShape &  input_shape,
const T *  input_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 154 of file PALreference_ops.h.

156{
157 ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
158 const int flat_size = MatchingFlatSize(input_shape, output_shape);
159 for (int i = 0; i < flat_size; ++i)
160 {
161 const int32 val = static_cast<int32_t>(input_data[i]);
162 int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
163 params.output_multiplier,
164 params.output_shift);
165 clamped = std::max(params.quantized_activation_min, clamped);
166 clamped = std::min(params.quantized_activation_max, clamped);
167 output_data[i] = static_cast<T>(clamped);
168 }
169}

References MatchingFlatSize(), and output_shape.

◆ Reverse()

template<typename Scalar >
void tflite::reference_ops::Reverse ( int  axis,
const RuntimeShape &  input_shape,
const Scalar *  input_data,
const RuntimeShape &  output_shape,
Scalar *  output_data 
)

Definition at line 1423 of file PALreference_ops.h.

1425{
1426 ruy::profiler::ScopeLabel label("Reverse");
1427
1428 int outer_size = 1;
1429 for (int i = 0; i < axis; ++i)
1430 {
1431 outer_size *= input_shape.Dims(i);
1432 }
1433
1434 int copy_size = 1;
1435 for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
1436 {
1437 copy_size *= input_shape.Dims(i);
1438 }
1439
1440 const int dims_at_axis = input_shape.Dims(axis);
1441 for (int i = 0; i < outer_size; ++i)
1442 {
1443 for (int j = 0; j < dims_at_axis; ++j)
1444 {
1445 const int start_pos = (i * dims_at_axis + j) * copy_size;
1446 Scalar *output_ptr = output_data + start_pos;
1447 int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
1448 memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
1449 }
1450 }
1451}

◆ ReverseSequence()

template<typename Scalar , typename TS >
void tflite::reference_ops::ReverseSequence ( const TS *  seq_lengths,
const int  seq_dim,
const int  batch_dim,
const RuntimeShape &  input_shape,
const Scalar *  input_data,
const RuntimeShape &  output_shape,
Scalar *  output_data 
)

Definition at line 1454 of file PALreference_ops.h.

1457{
1458 ruy::profiler::ScopeLabel label("ReverseSequence");
1459
1460 int outer_size = 1;
1461 int outer_dim = std::min(batch_dim, seq_dim);
1462 int medium_dim = std::max(batch_dim, seq_dim);
1463 for (int i = 0; i < outer_dim; ++i)
1464 {
1465 outer_size *= input_shape.Dims(i);
1466 }
1467
1468 int medium_size = 1;
1469 for (int i = outer_dim + 1; i < medium_dim; ++i)
1470 {
1471 medium_size *= input_shape.Dims(i);
1472 }
1473
1474 int copy_size = 1;
1475 for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
1476 {
1477 copy_size *= input_shape.Dims(i);
1478 }
1479
1480 const int dims_at_outer_dim = input_shape.Dims(outer_dim);
1481 const int dims_at_medium_dim = input_shape.Dims(medium_dim);
1482
1483 Scalar *output_ptr;
1484 if (batch_dim > seq_dim)
1485 {
1486 for (int i = 0; i < outer_size; ++i)
1487 {
1488 for (int j = 0; j < dims_at_outer_dim; ++j)
1489 {
1490 const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
1491 for (int p = 0; p < medium_size; ++p)
1492 {
1493 for (int q = 0; q < dims_at_medium_dim; ++q)
1494 {
1495 const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
1496 const Scalar *in_ptr = input_data + in_pos;
1497 int sl = seq_lengths[q] - 1;
1498 if (j > sl)
1499 {
1500 output_ptr = output_data + in_pos;
1501 }
1502 else
1503 {
1504 const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
1505 const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
1506 output_ptr = output_data + out_pos;
1507 }
1508 memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
1509 }
1510 }
1511 }
1512 }
1513 }
1514 else if (batch_dim < seq_dim)
1515 {
1516 for (int i = 0; i < outer_size; ++i)
1517 {
1518 for (int j = 0; j < dims_at_outer_dim; ++j)
1519 {
1520 const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
1521 int sl = seq_lengths[j] - 1;
1522 const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
1523 for (int p = 0; p < medium_size; ++p)
1524 {
1525 for (int q = 0; q < dims_at_medium_dim; ++q)
1526 {
1527 const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
1528 const Scalar *in_ptr = input_data + in_pos;
1529 if (q > sl)
1530 {
1531 output_ptr = output_data + in_pos;
1532 }
1533 else
1534 {
1535 const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
1536 output_ptr = output_data + out_pos;
1537 }
1538 memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
1539 }
1540 }
1541 }
1542 }
1543 }
1544}

◆ ScatterNd()

template<typename IndicesT , typename UpdatesT >
void tflite::reference_ops::ScatterNd ( const RuntimeShape &  indices_shape,
const IndicesT *  indices_data,
const RuntimeShape &  updates_shape,
const UpdatesT *  updates_data,
const RuntimeShape &  output_shape,
UpdatesT *  output_data 
)
inline

Definition at line 1031 of file PALreference_ops.h.

1034{
1035 ruy::profiler::ScopeLabel label("ScatterNd");
1036
1037 int n_slices = 1;
1038 int slice_size = 1;
1039 const int outer_dims = indices_shape.DimensionsCount() - 1;
1040 const int indices_nd = indices_shape.Dims(outer_dims);
1041 const int updates_dims = updates_shape.DimensionsCount();
1042 for (int i = 0; i < outer_dims; ++i)
1043 {
1044 n_slices *= indices_shape.Dims(i);
1045 }
1046 for (int i = outer_dims; i < updates_dims; ++i)
1047 {
1048 slice_size *= updates_shape.Dims(i);
1049 }
1050
1051 int output_flat_size = output_shape.FlatSize();
1052 int remain_flat_size = output_flat_size;
1053 std::vector<int> dims_to_count(indices_nd, 0);
1054 for (int i = 0; i < indices_nd; ++i)
1055 {
1056 dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
1057 remain_flat_size = dims_to_count[i];
1058 }
1059
1060 memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
1061 for (int i = 0; i < n_slices; ++i)
1062 {
1063 int to_pos = 0;
1064 for (int j = 0; j < indices_nd; ++j)
1065 {
1066 IndicesT idx = indices_data[i * indices_nd + j];
1067 TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
1068 to_pos += idx * dims_to_count[j];
1069 }
1070 for (int j = 0; j < slice_size; j++)
1071 {
1072 output_data[to_pos + j] += updates_data[i * slice_size + j];
1073 }
1074 }
1075}

References output_shape.

◆ SegmentSum()

template<typename T >
void tflite::reference_ops::SegmentSum ( const RuntimeShape &  input_shape,
const T *  input_data,
const RuntimeShape &  segment_ids_shape,
const int32_t *  segment_ids_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 1547 of file PALreference_ops.h.

1550{
1551 const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
1552
1553 memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
1554
1555 for (int i = 0; i < input_shape.Dims(0); i++)
1556 {
1557 int output_index = segment_ids_data[i];
1558 for (int j = 0; j < segment_flat_size; ++j)
1559 {
1560 output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
1561 }
1562 }
1563}

References output_shape.

◆ Select()

template<typename D , typename T >
void tflite::reference_ops::Select ( const RuntimeShape &  input_condition_shape,
const D *  input_condition_data,
const RuntimeShape &  input_x_shape,
const T *  input_x_data,
const RuntimeShape &  input_y_shape,
const T *  input_y_data,
const RuntimeShape &  output_shape,
T *  output_data 
)

Definition at line 1197 of file PALreference_ops.h.

1201{
1202 int64_t flatsize;
1203 // Allow select operator executions on mixed scalar tensors and one element
1204 // tensors.
1205 if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
1206 input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
1207 {
1208 flatsize = 1;
1209 }
1210 else
1211 {
1212 flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
1213 }
1214 for (int64_t i = 0; i < flatsize; ++i)
1215 {
1216 output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
1217 }
1218}

References MatchingFlatSize(), and output_shape.

◆ SelectTrueCoords()

template<typename D , typename T >
void tflite::reference_ops::SelectTrueCoords ( const RuntimeShape &  input_condition_shape,
const D *  input_condition_data,
T *  output_data 
)

Definition at line 1297 of file PALreference_ops.h.

1299{
1300 const size_t size = input_condition_shape.FlatSize();
1301 if (size == 0)
1302 {
1303 // Dimension is zero, in which case we don't need to output.
1304 return;
1305 }
1306 const size_t cond_rank = input_condition_shape.DimensionsCount();
1307
1308 std::vector<int> dims_to_count(cond_rank, 0);
1309 int cur_flat_size = size;
1310 for (int i = 0; i < cond_rank; ++i)
1311 {
1312 dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
1313 cur_flat_size = dims_to_count[i];
1314 }
1315
1316 int output_index = 0;
1317 for (int i = 0; i < size; ++i)
1318 {
1319 if (input_condition_data[i])
1320 {
1321 // Insert the coordinate of the current item (row major) into output.
1322 int flat_index = i;
1323 for (int j = 0; j < cond_rank; ++j)
1324 {
1325 int coord_j = flat_index / dims_to_count[j];
1326 output_data[output_index * cond_rank + j] = coord_j;
1327 flat_index %= dims_to_count[j];
1328 }
1329 output_index++;
1330 }
1331 }
1332}
int32_t size[5]
Definition Slice.cpp:35

References size.

◆ Slice() [1/3]

template<typename T >
void tflite::reference_ops::Slice ( const tflite::SliceParams &  op_params,
const RuntimeShape &  input_shape,
const RuntimeShape &  output_shape,
SequentialTensorWriter< T > *  writer 
)
inline

Definition at line 1078 of file PALreference_ops.h.

1080{
1081 const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
1082 TFLITE_DCHECK_LE(op_params.begin_count, 5);
1083 TFLITE_DCHECK_LE(op_params.size_count, 5);
1084 const int begin_count = op_params.begin_count;
1085 const int size_count = op_params.size_count;
1086 // We front-pad the begin and size vectors.
1087 std::array<int, 5> start;
1088 std::array<int, 5> stop;
1089 for (int i = 0; i < 5; ++i)
1090 {
1091 int padded_i = 5 - i;
1092 start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
1093 stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
1094 ? ext_shape.Dims(i)
1095 : start[i] + op_params.size[size_count - padded_i];
1096 }
1097
1098 for (int i0 = start[0]; i0 < stop[0]; ++i0)
1099 {
1100 for (int i1 = start[1]; i1 < stop[1]; ++i1)
1101 {
1102 for (int i2 = start[2]; i2 < stop[2]; ++i2)
1103 {
1104 for (int i3 = start[3]; i3 < stop[3]; ++i3)
1105 {
1106 for (int i4 = start[4]; i4 < stop[4]; ++i4)
1107 {
1108 writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
1109 }
1110 }
1111 }
1112 }
1113 }
1114}
int8_t size_count
Definition Slice.cpp:34
int8_t begin_count
Definition Slice.cpp:32

References begin_count, Offset(), and size_count.

Referenced by Slice(), and Slice().

◆ Slice() [2/3]

template<typename T >
void tflite::reference_ops::Slice ( const tflite::SliceParams &  op_params,
const RuntimeShape &  input_shape,
const T *  input_data,
const RuntimeShape &  output_shape,
T *  output_data 
)
inline

Definition at line 1117 of file PALreference_ops.h.

1119{
1120 SequentialTensorWriter<T> writer(input_data, output_data);
1121 return Slice(op_params, input_shape, output_shape, &writer);
1122}

References output_shape, and Slice().

◆ Slice() [3/3]

template<typename T >
void tflite::reference_ops::Slice ( const tflite::SliceParams &  op_params,
const RuntimeShape &  input_shape,
const TfLiteTensor *  input,
const RuntimeShape &  output_shape,
TfLiteTensor *  output 
)
inline

Definition at line 1125 of file PALreference_ops.h.

1127{
1128 SequentialTensorWriter<T> writer(input, output);
1129 return Slice(op_params, input_shape, output_shape, &writer);
1130}

References output_shape, and Slice().

◆ SparseToDense()

template<typename T , typename TI >
void tflite::reference_ops::SparseToDense ( const std::vector< std::vector< TI > > &  indices,
const T *  values,
default_value,
bool  value_is_scalar,
const RuntimeShape &  unextended_output_shape,
T *  output_data 
)
inline

Definition at line 1336 of file PALreference_ops.h.

1339{
1340 TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
1341 const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
1342 const int value_count = indices.size();
1343
1344 // First fill the output_data with default value.
1345 const int num_elements = output_shape.FlatSize();
1346 for (int i = 0; i < num_elements; ++i)
1347 {
1348 output_data[i] = default_value;
1349 }
1350
1351 // Special handle for value is scalar case to avoid checking the boolean
1352 // condition within the loop every time.
1353 if (value_is_scalar)
1354 {
1355 for (int i = 0; i < value_count; ++i)
1356 {
1357 const std::vector<TI> &index = indices[i];
1358 TFLITE_DCHECK_EQ(index.size(), 4);
1359 const T value = *values; // just use the first value.
1360 output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
1361 }
1362 return;
1363 }
1364
1365 // Go through the values and indices to fill the sparse values.
1366 for (int i = 0; i < value_count; ++i)
1367 {
1368 const std::vector<TI> &index = indices[i];
1369 TFLITE_DCHECK_EQ(index.size(), 4);
1370 const T value = values[i];
1371 output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
1372 }
1373}
loco::GraphInputIndex index(const TFPlaceholder *node)
Definition TFNode.cpp:54

References Offset(), and output_shape.

◆ Split()

template<typename Scalar >
void tflite::reference_ops::Split ( const SplitParams &  params,
const RuntimeShape &  input_shape,
const Scalar *  input_data,
const RuntimeShape *const *  output_shapes,
Scalar *const *  output_data 
)

Definition at line 834 of file PALreference_ops.h.

836{
837 ruy::profiler::ScopeLabel label("Split");
838 const int split_dimensions = input_shape.DimensionsCount();
839 int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
840 int outputs_count = params.num_split;
841 TFLITE_DCHECK_LT(axis, split_dimensions);
842
843 int64_t split_size = 0;
844 for (int i = 0; i < outputs_count; i++)
845 {
846 TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
847 for (int j = 0; j < split_dimensions; j++)
848 {
849 if (j != axis)
850 {
851 MatchingDim(*output_shapes[i], j, input_shape, j);
852 }
853 }
854 split_size += output_shapes[i]->Dims(axis);
855 }
856 TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
857 int64_t outer_size = 1;
858 for (int i = 0; i < axis; ++i)
859 {
860 outer_size *= input_shape.Dims(i);
861 }
862 // For all output arrays,
863 // FlatSize() = outer_size * Dims(axis) * base_inner_size;
864 int64_t base_inner_size = 1;
865 for (int i = axis + 1; i < split_dimensions; ++i)
866 {
867 base_inner_size *= input_shape.Dims(i);
868 }
869
870 const Scalar *input_ptr = input_data;
871 for (int k = 0; k < outer_size; k++)
872 {
873 for (int i = 0; i < outputs_count; ++i)
874 {
875 const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
876 memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
877 input_ptr += copy_size;
878 }
879 }
880}

◆ Sub16()

void tflite::reference_ops::Sub16 ( const ArithmeticParams &  params,
const RuntimeShape &  input1_shape,
const int16_t *  input1_data,
const RuntimeShape &  input2_shape,
const int16_t *  input2_data,
const RuntimeShape &  output_shape,
int16_t *  output_data 
)
inline

Definition at line 285 of file PALreference_ops.h.

289{
290 ruy::profiler::ScopeLabel label("Sub/Int16");
291 const int input1_shift = params.input1_shift;
292 const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
293 const int16 output_activation_min = params.quantized_activation_min;
294 const int16 output_activation_max = params.quantized_activation_max;
295
296 TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
297 TFLITE_DCHECK_LE(input1_shift, 0);
298 TFLITE_DCHECK_LE(params.input2_shift, 0);
299 const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
300 const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
301 const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
302
303 if (input1_shift == 0)
304 {
305 // F0 uses 0 integer bits, range [-1, 1].
306 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
307 for (int i = 0; i < flat_size; ++i)
308 {
309 F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
310 F0 scaled_input =
311 F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
312 F0 result = SaturatingSub(input_ready_scaled, scaled_input);
313 const int16 raw_output = result.raw();
314 const int16 clamped_output =
315 std::min(output_activation_max, std::max(output_activation_min, raw_output));
316 output_data[i] = clamped_output;
317 }
318 }
319 else
320 {
321 // F0 uses 0 integer bits, range [-1, 1].
322 using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
323 for (int i = 0; i < flat_size; ++i)
324 {
325 F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
326 F0 scaled_input =
327 F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
328 F0 result = SaturatingSub(scaled_input, input_ready_scaled);
329 const int16 raw_output = result.raw();
330 const int16 clamped_output =
331 std::min(output_activation_max, std::max(output_activation_min, raw_output));
332 output_data[i] = clamped_output;
333 }
334 }
335}

References output_shape.

◆ Unpack()

template<typename Scalar >
void tflite::reference_ops::Unpack ( const UnpackParams &  params,
const RuntimeShape &  input_shape,
const Scalar *  input_data,
const RuntimeShape &  output_shape,
Scalar *const *  output_datas 
)

Definition at line 370 of file PALreference_ops.h.

372{
373 ruy::profiler::ScopeLabel label("Unpack");
374 const int dimensions = input_shape.DimensionsCount();
375 const int outputs_count = params.num_split;
376
377 int outer_size = 1;
378 int axis = params.axis;
379 if (axis < 0)
380 {
381 axis += dimensions;
382 }
383 TFLITE_DCHECK_GE(axis, 0);
384 TFLITE_DCHECK_LT(axis, dimensions);
385 for (int i = 0; i < axis; ++i)
386 {
387 outer_size *= input_shape.Dims(i);
388 }
389 int copy_size = 1;
390 for (int i = axis + 1; i < dimensions; ++i)
391 {
392 copy_size *= input_shape.Dims(i);
393 }
394 TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
395
396 for (int i = 0; i < outputs_count; ++i)
397 {
398 for (int k = 0; k < outer_size; k++)
399 {
400 Scalar *output_ptr = output_datas[i] + copy_size * k;
401 int loc = k * outputs_count * copy_size + i * copy_size;
402 memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
403 }
404 }
405}

References output_shape.