125 const T *input_data,
const Shape &filter_shape,
const T *filter_data,
127 T *output_data, ruy::Context *ruy_context)
137 const auto max_threads = (ruy_context ==
nullptr) ? 1 : ruy_context->max_num_threads();
139 thread_count = std::max(1, std::min(thread_count, max_threads));
142 if constexpr (std::is_floating_point<T>::value)
144 thread_count = std::min(thread_count, 2);
150 if (thread_count == 1)
153 bias_shape, bias_data,
output_shape, output_data, 0, output_height,
158 int thread_dim, thread_dim_size;
162 thread_dim_size = output_batches;
167 thread_dim_size = output_height;
170 std::vector<DepthwiseConvWorkerTask<T, TS>> tasks;
173 tasks.reserve(thread_count);
174 int thread_start = 0;
175 for (
int i = 0; i < thread_count; ++i)
177 int thread_end = thread_start + (thread_dim_size - thread_start) / (thread_count - i);
178 tasks.emplace_back(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
179 bias_data,
output_shape, output_data, thread_start, thread_end, thread_dim);
180 thread_start = thread_end;
186 const float *input_data,
const Shape &filter_shape,
const float *filter_data,
187 const Shape &bias_shape,
const float *bias_data,
float *padded_filter_data,
192 throw std::runtime_error(
"Not support different length strides");
195 throw std::runtime_error{
"Not support dilation other than 1."};
198 const int input_depth = input_shape.
Dims(3);
200 const int input_height = input_shape.
Dims(1);
201 const int input_width = input_shape.
Dims(2);
202 const int filter_height = filter_shape.
Dims(1);
203 const int filter_width = filter_shape.
Dims(2);
214 batch, input_height, input_width, input_depth, filter_height, filter_width, depth_multiplier,
215 stride, pad_height, pad_width, output_height, output_width, output_depth, input_data,
216 filter_data, padded_filter_data, pad_filter, filter_buffers_data, output_data);
218 if (bias_data !=
nullptr)
220 bias_op::biasHelper<float>(bias_shape, bias_data,
output_shape, output_data, activation_min,
void DepthwiseConvImpl(const DepthwiseConvParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, const Shape &output_shape, float *output_data, int thread_start, int thread_end, int thread_dim)
void DepthwiseConv(const DepthwiseConvParams ¶ms, const Shape &input_shape, const T *input_data, const Shape &filter_shape, const T *filter_data, const Shape &bias_shape, const TS *bias_data, const Shape &output_shape, T *output_data, ruy::Context *ruy_context)
void DepthwiseConvOp(const DepthwiseConvParams ¶ms, const Shape &input_shape, const float *input_data, const Shape &filter_shape, const float *filter_data, const Shape &bias_shape, const float *bias_data, float *padded_filter_data, bool pad_filter, float *filter_buffers_data, const Shape &output_shape, float *output_data)
DepthwiseConvWorkerTask(const DepthwiseConvParams ¶ms, const Shape &input_shape, const T *input_data, const Shape &filter_shape, const T *filter_data, const Shape &bias_shape, const TS *bias_data, const Shape &output_shape, T *output_data, int thread_start, int thread_end, int thread_dim)