Class for the kernel converting boolean type. More...

#include <NECastBoolKernel.h>

Collaboration diagram for arm_compute::NECastBoolKernel:

Public Member Functions
const char *	name () const override

	NECastBoolKernel ()

	NECastBoolKernel (const NECastBoolKernel &)=delete

	NECastBoolKernel (NECastBoolKernel &&)=default

NECastBoolKernel &	operator= (const NECastBoolKernel &)=delete

NECastBoolKernel &	operator= (NECastBoolKernel &&)=default

void	configure (const ITensor input, ITensor output)

void	run (const Window &window, const ThreadInfo &info) override

Static Public Member Functions
static Status	validate (const ITensorInfo input, const ITensorInfo output)

Detailed Description

Class for the kernel converting boolean type.

Definition at line 52 of file NECastBoolKernel.h.

Constructor & Destructor Documentation

◆ NECastBoolKernel() [1/3]

NECastBoolKernel::NECastBoolKernel ( )

Default constructor

Definition at line 79 of file NECastBoolKernel.cpp.

79: _input(nullptr), _output(nullptr) {}

◆ NECastBoolKernel() [2/3]

arm_compute::NECastBoolKernel::NECastBoolKernel ( const NECastBoolKernel & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ NECastBoolKernel() [3/3]

arm_compute::NECastBoolKernel::NECastBoolKernel ( NECastBoolKernel && )

default

Default move constructor

Member Function Documentation

◆ configure()

void NECastBoolKernel::configure	(	const ITensor *	input,
		ITensor *	output
	)

Set the input and output of the kernel

Valid conversions Input -> Output :

U8 -> U8, S8, U16, S16, U32, S32, F32, F16

Parameters

[in]	input	The input tensor to convert. Data types supported: U8
[out]	output	The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.

Definition at line 81 of file NECastBoolKernel.cpp.

{
  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
  // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype
  // must be given)
  set_shape_if_empty(*output->info(), input->info()->tensor_shape());
 
  _input = input;
  _output = output;
 
  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
 
  // Configure kernel window
  Window win = calculate_max_window(*input->info(), Steps());
  Coordinates coord;
  coord.set_num_dimensions(output->info()->num_dimensions());
  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
 
  ICPPKernel::configure(win);
}

◆ name()

const char * arm_compute::NECastBoolKernel::name ( ) const

inlineoverride

Definition at line 55 of file NECastBoolKernel.h.

55{ return "NECastBoolKernel"; }

◆ operator=() [1/2]

NECastBoolKernel & arm_compute::NECastBoolKernel::operator= ( const NECastBoolKernel & )

delete

Prevent instances of this class from being copied (As this class contains pointers)

◆ operator=() [2/2]

NECastBoolKernel & arm_compute::NECastBoolKernel::operator= ( NECastBoolKernel && )

default

Default move assignment operator

References validate().

◆ run()

void NECastBoolKernel::run	(	const Window &	window,
		const ThreadInfo &	info
	)

override

Definition at line 109 of file NECastBoolKernel.cpp.

{
  ARM_COMPUTE_UNUSED(info);
  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
  ARM_COMPUTE_ERROR_ON_NULLPTR(_input, _output);
  ARM_COMPUTE_ERROR_ON(_input == _output);
 
  const auto window_start_x = static_cast<int>(window.x().start());
  const auto window_end_x = static_cast<int>(window.x().end());
  const int window_step_x = 16;
 
  Window win{window};
  win.set(Window::DimX, Window::Dimension(0, 1, 1));
 
  Iterator input(_input, win);
  Iterator output(_output, win);
 
  const uint8_t true_val = 1;
  const uint8x8_t mask_bool = vdup_n_u8(true_val);
 
  switch (_output->info()->data_type())
  {
    case DataType::S8:
    {
      /* Conversion U8 -> S8 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            vst1q_s8(output_ptr + x,
                     vreinterpretq_s8_u8(vandq_u8(texels_u8, vdupq_n_u8(true_val))));
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) & true_val);
          }
        },
        input, output);
      break;
    }
    case DataType::S16:
    {
      /* Up-conversion U8 -> S16 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            const int16x8x2_t texels = {
              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
 
            vst1q_s16(output_ptr + x, texels.val[0]);
            vst1q_s16(output_ptr + x + 8, texels.val[1]);
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            *(output_ptr + x) = static_cast<int32_t>(*(input_ptr + x) & true_val);
          }
        },
        input, output);
      break;
    }
    case DataType::S32:
    {
      /* Up-conversion U8 -> S32 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<int32_t *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            const int16x8x2_t texels = {
              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
 
            vst1q_s32(output_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
            vst1q_s32(output_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
            vst1q_s32(output_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
            vst1q_s32(output_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            *(output_ptr + x) = static_cast<uint32_t>(*(input_ptr + x) & true_val);
          }
        },
        input, output);
      break;
    }
    case DataType::F32:
    {
      /* Up-conversion U8 -> F32 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<float *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            const int16x8x2_t texels = {
              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
            vst1q_f32(output_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
            vst1q_f32(output_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
            vst1q_f32(output_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
            vst1q_f32(output_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            auto in = static_cast<uint32_t>(*(input_ptr + x) & true_val);
            *(output_ptr + x) = static_cast<float>(in);
          }
        },
        input, output);
      break;
    }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
    case DataType::F16:
    {
      /* Up-conversion U8 -> F16 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            const int16x8x2_t texels = {
              {vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool))),
               vreinterpretq_s16_u16(vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool)))}};
            vst1q_f16(output_ptr + x, vcvtq_f16_s16(texels.val[0]));
            vst1q_f16(output_ptr + x + 8, vcvtq_f16_s16(texels.val[1]));
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) & true_val);
          }
        },
        input, output);
      break;
    }
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
    case DataType::U8:
    {
      /* Conversion U8 -> S8 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            vst1q_u8(output_ptr + x, vandq_u8(texels_u8, vdupq_n_u8(true_val)));
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) & true_val);
          }
        },
        input, output);
      break;
    }
    case DataType::U16:
    {
      /* Up-conversion U8 -> U16 */
      execute_window_loop(
        win,
        [&](const Coordinates &) {
          const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
          const auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
 
          int x = window_start_x;
          for (; x <= (window_end_x - window_step_x); x += window_step_x)
          {
            const uint8x16_t texels_u8 = vld1q_u8(input_ptr + x);
 
            const uint16x8x2_t texels = {{vmovl_u8(vand_u8(vget_low_u8(texels_u8), mask_bool)),
                                          vmovl_u8(vand_u8(vget_high_u8(texels_u8), mask_bool))}};
 
            vst1q_u16(output_ptr + x, texels.val[0]);
            vst1q_u16(output_ptr + x + 8, texels.val[1]);
          }
 
          // Compute left-over elements
          for (; x < window_end_x; ++x)
          {
            *(output_ptr + x) = static_cast<uint16_t>(*(input_ptr + x) & true_val);
          }
        },
        input, output);
      break;
    }
    default:
      ARM_COMPUTE_ERROR("Output data type not supported");
  }
}

References info.

◆ validate()

Status NECastBoolKernel::validate	(	const ITensorInfo *	input,
		const ITensorInfo *	output
	)

static

Static function to check if given info will lead to a valid configuration of NECastBoolKernel