ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Softmax.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "OMStatus.h"
18#include "core/OMUtils.h"
19#include "core/OMDataType.h"
23#include "PALSoftmaxInputGrad.h"
24
25using namespace onert_micro;
26using namespace onert_micro::core;
27using namespace onert_micro::train;
28
29namespace
30{
31
32constexpr uint32_t inputTensorIdx = 0;
33constexpr uint32_t outputTensorIdx = 0;
34
35} // namespace
36
37/*
38 * - Calculate input gradient - Optional (not required if it is last op)
39 */
40OMStatus onert_micro::train::train_kernel_CircleSoftmax(const OMBackpropExecuteArgs &args)
41{
42 // Check is it last layer for training
43 if (args.is_last_layer)
44 {
45 return Ok;
46 }
47
48 core::OMRuntimeStorage &forward_storage = args.forward_storage;
49 core::OMRuntimeStorage &backward_storage = args.backward_storage;
50 core::OMRuntimeContext &context = args.backward_context;
51 uint16_t op_index = args.kernel_index;
52
53 const circle::Tensor *input;
54 const circle::Tensor *output;
55
56 uint8_t *dloss_dinput_data;
57
58 uint8_t *output_data;
59 uint8_t *dloss_doutput_data;
60
61 // Read kernel
62 {
63 execute::OMRuntimeKernel runtime_kernel;
64 runtime_kernel.readKernel(op_index, context);
65
66 input = runtime_kernel.inputs[inputTensorIdx];
67 output = runtime_kernel.outputs[outputTensorIdx];
68 assert(input != nullptr);
69 assert(output != nullptr);
70
71 // Read forward storage
72 {
73 runtime_kernel.getDataFromStorage(op_index, forward_storage, context);
74
76 assert(output_data != nullptr);
77 }
78
79 // Read backward storage
80 {
81 runtime_kernel.getDataFromStorage(op_index, backward_storage, context);
82
83 dloss_dinput_data = runtime_kernel.inputs_data[inputTensorIdx];
84 dloss_doutput_data = runtime_kernel.outputs_data[outputTensorIdx];
85
86 assert(dloss_dinput_data != nullptr);
87 assert(dloss_doutput_data != nullptr);
88 }
89 }
90
91 OMRuntimeShape input_shape(input);
93
94 // Check Softmax output and input shape
95 assert(output_shape.dimensionsCount() == 2);
96 assert(output_shape.dims(0) == 1);
97 if (output_shape.dimensionsCount() != 2 or output_shape.dims(0) != 1)
98 return UnsupportedType;
99
100 // Allocate temporary buffer to save Jacobian row
101 uint8_t *jacobian_row_data = nullptr;
103 output_shape.flatSize() * sizeof(OMDataType(output->type())), &jacobian_row_data);
104 assert(status == Ok);
105 if (status != Ok)
106 return status;
107
108 // Calculate input grad
109 pal::SoftmaxInputGrad(core::utils::castInputData<float>(dloss_doutput_data), output_shape,
110 core::utils::castInputData<float>(output_data),
111 core::utils::castOutputData<float>(jacobian_row_data),
112 core::utils::castOutputData<float>(dloss_dinput_data));
113
114#ifdef OM_MEMORY_ESTIMATE
115 // Deallocate temporary buffer with Jacobian row
117 output_shape.flatSize() * sizeof(OMDataType(output->type())), jacobian_row_data);
118#else
119 // Deallocate temporary buffer with Jacobian row
120 status = core::memory::OMMemoryManager::deallocateMemory(jacobian_row_data);
121#endif
122
123 return status;
124}
int32_t dimensionsCount() const
Definition Tensor.h:106
int32_t dims(int i) const
Definition Tensor.h:108
uint8_t * outputs_data[maxOutputSize]
OMStatus getDataFromStorage(uint16_t op_index, core::OMRuntimeStorage &storage, core::OMRuntimeContext &context)
OMStatus readKernel(uint16_t op_index, core::OMRuntimeContext &runtime_context)
const circle::Tensor * outputs[maxOutputSize]
const circle::Tensor * inputs[maxInputSize]
const luci_interpreter::RuntimeShape output_shape
constexpr uint32_t outputTensorIdx
args
Definition infer.py:21
OMDataType
"scalar" value type
Definition OMDataType.h:35
void SoftmaxInputGrad(const float *dloss_doutput_data, const core::OMRuntimeShape &dloss_doutput_shape, const float *calculated_data, float *jacobian_row_data, float *dloss_dinput_data)
@ UnsupportedType
Definition OMStatus.h:26
static OMStatus deallocateMemory(uint8_t *data)
static OMStatus allocateMemory(uint32_t size, uint8_t **data)