ONE - On-device Neural Engine
Loading...
Searching...
No Matches
arm_compute::CLReduceOperation Class Reference

Class to perform ReduceOperation. More...

#include <CLReduceOperation.h>

Collaboration diagram for arm_compute::CLReduceOperation:

Public Member Functions

 CLReduceOperation (std::shared_ptr< IMemoryManager > memory_manager)
 Construct a new ReduceOperation object.
 
void configure (ICLTensor *input, ICLTensor *output, const std::set< uint32_t > &axis, bool keep_dims, ReductionOperation op)
 Set the input and output tensors.
 
void run () override
 Run the OpenCL kernel for this operation.
 

Static Public Member Functions

static Status validate (const ITensorInfo *input, const ITensorInfo *output, const std::set< uint32_t > &axis, bool keep_dims, const ReductionOperation &op)
 Static function to check if given info will lead to a valid configuration of CLReduceOperation.
 

Detailed Description

Class to perform ReduceOperation.

Definition at line 66 of file CLReduceOperation.h.

Constructor & Destructor Documentation

◆ CLReduceOperation()

CLReduceOperation::CLReduceOperation ( std::shared_ptr< IMemoryManager >  memory_manager)

Construct a new ReduceOperation object.

Definition at line 48 of file CLReduceOperation.cpp.

49 : _memory_group(std::move(memory_manager)), _input(nullptr), _output(nullptr), _axis(),
50 _keep_dims(false), _interm_tensors(), _reduce_kernels(), _reshape()
51{
52}

Member Function Documentation

◆ configure()

void CLReduceOperation::configure ( ICLTensor *  input,
ICLTensor *  output,
const std::set< uint32_t > &  axis,
bool  keep_dims,
ReductionOperation  op 
)

Set the input and output tensors.

Parameters
[in]inputSource tensor. Data types supported: U8/S32/F32
[out]outputDestination tensor. Data types and data layouts supported: Same as input.
[in]axisAxis along which to reduce. It must be sorted and no duplicates.
[in]keep_dimsIf positive, retains reduced dimensions with length 1.
[in]opReduce operation to perform.
Returns
N/A

Definition at line 106 of file CLReduceOperation.cpp.

109{
110 ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, keep_dims, op));
111
112 _axis = axis;
113
114 _input = input;
115 _output = output;
116 _keep_dims = keep_dims;
117
118 // NOTE The axis must have no duplication.
119 const size_t num_of_kernels = axis.size();
120 const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
121
122 if (num_of_kernels < 1)
123 {
124 throw std::runtime_error("CLReduceOperation: there is no axis to reduce");
125 }
126
127 _interm_tensors = std::make_unique<CLTensor[]>(num_of_interm_tensors);
128 _reduce_kernels = std::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
129
130 // Set a vector that is ordered ICLTensors sequentially.
131 std::vector<ICLTensor *> tensors;
132 tensors.emplace_back(input);
133 for (size_t i = 0; i < num_of_interm_tensors; ++i)
134 {
135 tensors.emplace_back(_interm_tensors.get() + i);
136 }
137 tensors.emplace_back(output);
138
139 // Apply ReductionOperation on all kernels
140 TensorShape shape{input->info()->tensor_shape()};
141 auto it = axis.begin();
142 for (size_t i = 0; i < num_of_kernels; ++i, ++it)
143 {
144 shape.set(*it, 1, false);
145 if (!keep_dims || i != (num_of_kernels - 1))
146 {
147 _interm_tensors[i].allocator()->init(input->info()->clone()->set_tensor_shape(shape));
148 _memory_group.manage(&_interm_tensors[i]);
149 }
150 _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op);
151 if (i != 0)
152 {
153 _interm_tensors[i - 1].allocator()->allocate();
154 }
155 }
156
157 // Configure reshape layer if we want to drop the dimensions
158 if (!keep_dims)
159 {
160 _reshape.configure(&_interm_tensors[num_of_interm_tensors - 1], output);
161 _interm_tensors[num_of_interm_tensors - 1].allocator()->allocate();
162 }
163}
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const std::set< uint32_t > &axis, bool keep_dims, const ReductionOperation &op)
Static function to check if given info will lead to a valid configuration of CLReduceOperation.
::nncc::core::ADT::tensor::Shape TensorShape
Definition TensorShape.h:25

References validate().

◆ run()

void CLReduceOperation::run ( )
override

Run the OpenCL kernel for this operation.

Returns
N/A

Definition at line 165 of file CLReduceOperation.cpp.

166{
167 MemoryGroupResourceScope scope_mg(_memory_group);
168
169 const size_t num_of_kernels = _axis.size();
170 for (size_t i = 0; i < num_of_kernels; ++i)
171 {
172 CLScheduler::get().enqueue(_reduce_kernels[i]);
173 }
174
175 if (!_keep_dims)
176 {
177 _reshape.run();
178 }
179}

Referenced by package.infer.session::inference().

◆ validate()

Status CLReduceOperation::validate ( const ITensorInfo *  input,
const ITensorInfo *  output,
const std::set< uint32_t > &  axis,
bool  keep_dims,
const ReductionOperation &  op 
)
static

Static function to check if given info will lead to a valid configuration of CLReduceOperation.

Parameters
[in]inputSource tensor info. Data types supported: U8/S32/F32
[in]outputDestination tensor info. Data types and data layouts supported: Same as input.
[in]axisAxis along which to reduce. It must be sorted and no duplicates.
[in]keep_dimsIf positive, retains reduced dimensions with length 1.
[in]opReduce operation to perform.
Returns
a status

Definition at line 54 of file CLReduceOperation.cpp.

57{
58 const size_t num_of_kernels = axis.size();
59 const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
60
61 ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
62
63 // Create temporary tensor infos
64 auto interm_tensors = std::make_unique<TensorInfo[]>(num_of_interm_tensors);
65
66 // Create intermediate tensor info
67 TensorShape shape{input->tensor_shape()};
68
69 auto it = axis.begin();
70 for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it)
71 {
72 shape.set(*it, 1, false);
73 interm_tensors[i].set_data_type(input->data_type());
74 interm_tensors[i].set_tensor_shape(shape);
75 interm_tensors[i].set_num_channels(input->num_channels());
76 interm_tensors[i].set_data_layout(input->data_layout());
77 interm_tensors[i].set_quantization_info(input->quantization_info());
78 }
79
80 // Set a vector that is ordered ITensorInfo sequentially.
81 std::vector<const ITensorInfo *> tensors;
82 tensors.emplace_back(input);
83 for (size_t i = 0; i < num_of_interm_tensors; ++i)
84 {
85 tensors.emplace_back(interm_tensors.get() + i);
86 }
87 tensors.emplace_back(output);
88
89 // Validate ReduceOperation only on all kernels
90 it = axis.begin();
91 for (size_t i = 0; i < num_of_kernels; ++i, ++it)
92 {
93 ARM_COMPUTE_RETURN_ON_ERROR(
94 CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
95 }
96
97 if (!keep_dims)
98 {
99 ARM_COMPUTE_RETURN_ON_ERROR(
100 CLReshapeLayer::validate(&interm_tensors[num_of_interm_tensors - 1], output));
101 }
102
103 return Status{};
104}
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis, ReductionOperation op)
Static function to check if given info will lead to a valid configuration of CLReduceOperationKernel.

References arm_compute::CLReduceOperationKernel::validate().

Referenced by configure().


The documentation for this class was generated from the following files: