18#ifndef __NNFW_RUY_NEON_TENSOR_UTILS_H__
19#define __NNFW_RUY_NEON_TENSOR_UTILS_H__
25#define kFloatWeightsPerNeonLane 4
32inline bool NeonIsZeroVector(
const float *vector,
int v_size)
37 const int postamble_start = v_size - (v_size & (kFloatWeightsPerNeonLane - 1));
39 const float32x4_t zero_x4_float = vmovq_n_f32(0.0f);
40 for (
int v = 0; v < postamble_start; v += kFloatWeightsPerNeonLane)
42 const float32x4_t i_x4_float = vld1q_f32(vector + v);
43 uint32x4_t cmp_result = vceqq_f32(i_x4_float, zero_x4_float);
44 if (vgetq_lane_u32(cmp_result, 0) == 0)
46 if (vgetq_lane_u32(cmp_result, 1) == 0)
48 if (vgetq_lane_u32(cmp_result, 2) == 0)
50 if (vgetq_lane_u32(cmp_result, 3) == 0)
55 for (
int v = postamble_start; v < v_size; ++v)