ONE - On-device Neural Engine
Loading...
Searching...
No Matches
GRU.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18
19#include "core/OMUtils.h"
20#include "core/OMDataType.h"
22
25
26#include "PALGRUWeightGrad.h"
27
28using namespace onert_micro;
29using namespace onert_micro::train;
30
31namespace
32{
33
34constexpr uint32_t inputTensorIdx = 0;
35constexpr uint32_t hiddenHiddenTensorIdx = 1;
36constexpr uint32_t hiddenHiddenBiasTensorIdx = 2;
37constexpr uint32_t hiddenInputTensorIdx = 3;
38constexpr uint32_t hiddenInputBiasTensorIdx = 4;
39constexpr uint32_t stateTensorIdx = 5;
40
41constexpr uint32_t outputTensorIdx = 0;
42
43} // namespace
44
45OMStatus onert_micro::train::train_kernel_CircleGRU(const OMBackpropExecuteArgs &args)
46{
47 // Check is it last layer for training
48 core::OMRuntimeContext &runtime_context = args.backward_context;
49 core::OMRuntimeStorage &backward_storage = args.backward_storage;
50 core::OMRuntimeStorage &forward_storage = args.forward_storage;
51 uint16_t op_index = args.kernel_index;
52
53 execute::OMRuntimeKernel runtime_kernel;
54 runtime_kernel.readKernel(op_index, runtime_context);
55
56 const circle::Tensor *input = runtime_kernel.inputs[inputTensorIdx];
57 const circle::Tensor *weight_input = runtime_kernel.inputs[hiddenInputTensorIdx];
58 const circle::Tensor *weight_hidden = runtime_kernel.inputs[hiddenHiddenTensorIdx];
59 const circle::Tensor *output = runtime_kernel.outputs[outputTensorIdx];
60
61 assert(input != nullptr);
62 assert(output != nullptr);
63
64 OMStatus status = Ok;
65
66 // Read forward
67 status = runtime_kernel.getDataFromStorage(op_index, forward_storage, runtime_context);
68 if (status != Ok)
69 return status;
70 uint8_t *input_data = runtime_kernel.inputs_data[inputTensorIdx];
71 uint8_t *weight_input_data = runtime_kernel.inputs_data[hiddenInputTensorIdx];
72 uint8_t *weight_hidden_data = runtime_kernel.inputs_data[hiddenHiddenTensorIdx];
73 uint8_t *bias_input_data = runtime_kernel.inputs_data[hiddenInputBiasTensorIdx];
74 uint8_t *bias_hidden_data = runtime_kernel.inputs_data[hiddenHiddenBiasTensorIdx];
75 uint8_t *intermediate_buffer = runtime_kernel.inputs_data[stateTensorIdx];
76 // Bias_data can be nullptr
77 assert(input_data != nullptr);
78 assert(weight_input_data != nullptr);
79 assert(weight_hidden_data != nullptr);
80 assert(intermediate_buffer != nullptr);
81
82 // Read backward
83 status = runtime_kernel.getDataFromStorage(op_index, backward_storage, runtime_context);
84 uint8_t *output_grad_data = runtime_kernel.outputs_data[outputTensorIdx];
85 uint8_t *weight_input_grad_data = runtime_kernel.inputs_data[hiddenInputTensorIdx];
86 uint8_t *weight_hidden_grad_data = runtime_kernel.inputs_data[hiddenHiddenTensorIdx];
87 uint8_t *bias_input_grad_data = runtime_kernel.inputs_data[hiddenInputBiasTensorIdx];
88 uint8_t *bias_hidden_grad_data = runtime_kernel.inputs_data[hiddenHiddenBiasTensorIdx];
89 uint8_t *state_grad_data = runtime_kernel.inputs_data[stateTensorIdx];
90 uint8_t *input_grad_data = runtime_kernel.inputs_data[inputTensorIdx];
91 // Bias_data and input_grad_data can be nullptr
92 // Note: input_grad_data can be nullptr due to it can be last trainable node
93 assert(output_grad_data != nullptr);
94 assert(weight_input_grad_data != nullptr);
95 assert(weight_hidden_grad_data != nullptr);
96 assert(state_grad_data != nullptr);
97
98 // Obtain shapes
99 core::OMRuntimeShape input_shape(input);
101 core::OMRuntimeShape weight_input_shape(weight_input);
102 core::OMRuntimeShape weight_hidden_shape(weight_hidden);
103
104 // Init output shape for FullyConnected layers
105 core::OMRuntimeShape output_shape_fc(2);
106 output_shape_fc.setDim(0, 1);
107 output_shape_fc.setDim(1, weight_hidden_shape.dims(0));
108
109 // Allocate memory for outputs temporary gradients for FullyConnected layers
110 uint8_t *left_fc_output_grad_buffer;
111 uint8_t *right_fc_output_grad_buffer;
112 // Checking during import
113 assert(weight_hidden_shape.dims(0) == weight_input_shape.dims(0));
114 size_t allocation_size = sizeof(core::OMDataType(input->type())) * weight_hidden_shape.dims(0);
115 status =
116 core::memory::OMMemoryManager::allocateMemory(allocation_size, &left_fc_output_grad_buffer);
117 if (status != Ok)
118 return status;
119 status =
120 core::memory::OMMemoryManager::allocateMemory(allocation_size, &right_fc_output_grad_buffer);
121 if (status != Ok)
122 return status;
123
124 assert(left_fc_output_grad_buffer != nullptr and right_fc_output_grad_buffer != nullptr);
125
126 // Currently support only float training
127 if (input->type() != circle::TensorType_FLOAT32)
128 return UnsupportedType;
129
130 status =
131 pal::GRUWeightGrads(core::utils::castInputData<float>(output_grad_data),
132 core::utils::castInputData<float>(weight_input_data),
133 core::utils::castOutputData<float>(weight_input_grad_data),
134 core::utils::castInputData<float>(weight_hidden_data),
135 core::utils::castOutputData<float>(weight_hidden_grad_data),
136 core::utils::castInputData<float>(bias_input_data),
137 core::utils::castOutputData<float>(bias_input_grad_data),
138 core::utils::castInputData<float>(bias_hidden_data),
139 core::utils::castOutputData<float>(bias_hidden_grad_data),
140 core::utils::castInputData<float>(input_data),
141 core::utils::castOutputData<float>(input_grad_data),
142 core::utils::castOutputData<float>(state_grad_data), input_shape,
143 output_shape, weight_input_shape, weight_hidden_shape, output_shape_fc,
144 core::utils::castOutputData<float>(intermediate_buffer),
145 core::utils::castOutputData<float>(left_fc_output_grad_buffer),
146 core::utils::castOutputData<float>(right_fc_output_grad_buffer));
147
148 // TODO: add input grads calculation
149
150 // Deallocate
152 core::memory::OMMemoryManager::deallocateMemory(left_fc_output_grad_buffer);
153 core::memory::OMMemoryManager::deallocateMemory(right_fc_output_grad_buffer);
154
155 forward_storage.removeTensorFromTensorIndexToData(runtime_kernel.inputs_index[stateTensorIdx]);
156
157 return status;
158}
OMStatus removeTensorFromTensorIndexToData(uint16_t tensor_index)
uint8_t * outputs_data[maxOutputSize]
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t outputTensorIdx
args
Definition infer.py:21
list input_data
Definition infer.py:29
OMDataType
"scalar" value type
Definition OMDataType.h:35
OMStatus GRUWeightGrads(const float *output_grad_data, const float *weight_input_data, float *weight_input_grad_data, const float *weight_hidden_data, float *weight_hidden_grad_data, const float *bias_input_data, float *bias_input_grad_data, const float *bias_hidden_data, float *bias_hidden_grad_data, const float *input_data, float *input_grad_data, float *state_grad_data, const core::OMRuntimeShape &input_shape, const core::OMRuntimeShape &output_shape, const core::OMRuntimeShape &weight_input_shape, const core::OMRuntimeShape &weight_hidden_shape, const core::OMRuntimeShape &output_shape_fc, float *intermediate_buffer, float *left_fc_output_grad_buffer, float *right_fc_output_grad_buffer)
@ UnsupportedType
Definition OMStatus.h:26
static OMStatus deallocateMemory(uint8_t *data)
static OMStatus allocateMemory(uint32_t size, uint8_t **data)