ONE - On-device Neural Engine
Loading...
Searching...
No Matches
PALDepthwiseConv2D.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef ONERT_MICRO_EXECUTE_PAL_DEPTHWISE_CONV_2D_H
19#define ONERT_MICRO_EXECUTE_PAL_DEPTHWISE_CONV_2D_H
20
21#include "PALDepthwiseConv2DCommon.h"
22#include "core/OMKernelData.h"
23#include "core/OMRuntimeShape.h"
24#include "PALUtils.h"
25
26#include <arm_nnfunctions.h>
27
28namespace onert_micro
29{
30namespace execute
31{
32namespace pal
33{
34
35// Fixed-point per-channel-quantization deptwise_conv2D reference kernel.
37 const core::OMRuntimeShape &input_shape, const int8_t *input_data,
38 const core::OMRuntimeShape &filter_shape,
39 const int8_t *filter_data, const int32_t *bias_data,
40 const core::OMRuntimeShape &output_shape, int8_t *output_data)
41{
42 cmsis_nn_dw_conv_params conv_params;
43 conv_params.dilation.h = params.dilation_height_factor;
44 conv_params.dilation.w = params.dilation_width_factor;
45
46 assert(conv_params.dilation.h == 1);
47 assert(conv_params.dilation.w == 1);
48
49 conv_params.input_offset = params.input_offset;
50 conv_params.output_offset = params.output_offset;
51 conv_params.stride.h = params.stride_height;
52 conv_params.stride.w = params.stride_width;
53 conv_params.padding.h = params.pad_h;
54 conv_params.padding.w = params.pad_w;
55 conv_params.activation.min = params.quantized_activation_min;
56 conv_params.activation.max = params.quantized_activation_max;
57 conv_params.ch_mult = params.depth_multiplier;
58
59 cmsis_nn_per_channel_quant_params quant_params;
60 quant_params.multiplier = const_cast<int32_t *>(params.per_channel_output_multiplier.data());
61 quant_params.shift = const_cast<int32_t *>(
62 reinterpret_cast<const int32_t *>(params.per_channel_output_shift.data()));
63
64 assert(conv_params.activation.min <= conv_params.activation.max);
65 const int batch_size = input_shape.dims(0);
66 const int input_depth = input_shape.dims(3);
67 const int output_depth = filter_shape.dims(0);
68
69 cmsis_nn_dims input_dims;
70 input_dims.n = batch_size;
71 input_dims.h = input_shape.dims(1);
72 input_dims.w = input_shape.dims(2);
73 input_dims.c = input_depth;
74
75 cmsis_nn_dims filter_dims;
76 filter_dims.n = output_depth;
77 filter_dims.h = filter_shape.dims(1);
78 filter_dims.w = filter_shape.dims(2);
79 filter_dims.c = input_depth;
80
81 cmsis_nn_dims bias_dims;
82 bias_dims.n = 1;
83 bias_dims.h = 1;
84 bias_dims.w = 1;
85 bias_dims.c = output_depth;
86
87 cmsis_nn_dims output_dims;
88 output_dims.n = batch_size;
89 output_dims.h = output_shape.dims(1);
90 output_dims.w = output_shape.dims(2);
91 output_dims.c = output_depth;
92
93 auto buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
94 &filter_dims, &output_dims);
95
96 auto buffer = std::make_unique<int8_t[]>(buf_size);
97 assert(buffer != nullptr);
98
99 cmsis_nn_context ctx;
100 ctx.buf = buffer.get();
101 ctx.size = buf_size;
102
103 auto res = arm_depthwise_conv_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims,
104 input_data, &filter_dims, filter_data, &bias_dims,
105 bias_data, &output_dims, output_data);
106
107 assert(res == ARM_CMSIS_NN_SUCCESS);
108 if (res != ARM_CMSIS_NN_SUCCESS)
109 return CmsisNNError;
110 return Ok;
111}
112
113} // namespace pal
114} // namespace execute
115} // namespace onert_micro
116
117#endif // ONERT_MICRO_EXECUTE_PAL_DEPTHWISE_CONV_2D_H
int32_t dims(int i) const
Definition Tensor.h:108
const luci_interpreter::RuntimeShape output_shape
OMStatus DepthwiseConvPerChannel(const core::ConvQuant &params, const core::OMRuntimeShape &input_shape, const int8_t *input_data, const core::OMRuntimeShape &filter_shape, const int8_t *filter_data, const int32_t *bias_data, const core::OMRuntimeShape &output_shape, int8_t *output_data)
std::vector< int > per_channel_output_shift
std::vector< int32_t > per_channel_output_multiplier