ONE - On-device Neural Engine
Loading...
Searching...
No Matches
Concat.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include "Concat.h"
19#include "Common.h"
20
21#include <cmath>
22#include <cstring>
23
24namespace mir_interpreter
25{
26
27template <typename T> struct ConcatImpl
28{
29 static void run(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs,
30 int axis, mir::TensorVariant &output);
31};
32
33template <typename T>
34void ConcatImpl<T>::run(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs,
35 int axis, mir::TensorVariant &output)
36{
37 const auto &output_shape = output.getShape();
38 const size_t inputs_count = inputs.size();
39 const int32_t concat_dims = output_shape.rank();
40 int64_t concat_size = 0;
41 for (size_t i = 0; i < inputs_count; i++)
42 {
43 const auto &input_shape = inputs[i].get().getShape();
44 assert(input_shape.rank() == concat_dims);
45 for (int32_t j = 0; j < concat_dims; j++)
46 {
47 if (j != axis)
48 {
49 assert(input_shape.dim(j) == output_shape.dim(j));
50 }
51 }
52 concat_size += input_shape.dim(axis);
53 }
54 assert(concat_size == output_shape.dim(axis));
55 // Outer size before axis
56 int32_t outer_size = 1;
57 for (int32_t i = 0; i < axis; i++)
58 outer_size *= output_shape.dim(i);
59 // Inner size after axis
60 int32_t base_inner_size = 1;
61 for (int32_t i = axis + 1; i < concat_dims; i++)
62 base_inner_size *= output_shape.dim(i);
63 // flatten = outer_size * dim(axis) * base_inner_size;
64 std::vector<int32_t> copy_sizes;
65 std::vector<char *> input_ptrs;
66 for (size_t i = 0; i < inputs_count; i++)
67 {
68 const auto input_shape = inputs[i].get().getShape();
69 copy_sizes.push_back(input_shape.dim(axis) * base_inner_size);
70 input_ptrs.push_back(inputs[i].get().atOffset(0));
71 }
72
73 char *output_ptr = output.atOffset(0);
74 const size_t elem_size = inputs[0].get().getElementSize();
75 for (int32_t i = 0; i < outer_size; i++)
76 {
77 for (size_t j = 0; j < inputs_count; j++)
78 {
79 std::memcpy(output_ptr, input_ptrs[j], copy_sizes[j] * elem_size);
80 output_ptr += copy_sizes[j] * elem_size;
81 input_ptrs[j] += copy_sizes[j] * elem_size;
82 }
83 }
84}
85
86template <> struct ConcatImpl<uint8_t>
87{
88 static void run(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs,
89 int axis, mir::TensorVariant &output);
90};
91
93 const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
94 mir::TensorVariant &output)
95{
96 const size_t inputs_count = inputs.size();
97 std::vector<int32_t> input_zeropoints(inputs_count);
98 std::vector<float> input_scales(inputs_count);
99 const auto &output_shape = output.getShape();
100 const int32_t concat_dimensions = output_shape.rank();
101 int64_t concat_size = 0;
102 for (size_t i = 0; i < inputs_count; i++)
103 {
104 const auto &input_type = inputs[i].get().getType();
105 assert(input_type.isQuantized());
106 assert(input_type.getElementType() == mir::DataType::UINT8);
107 const auto &input_shape = input_type.getShape();
108 assert(input_shape.rank() == concat_dimensions);
109
110 for (int32_t j = 0; j < concat_dimensions; j++)
111 if (j != axis)
112 assert(input_shape.dim(j) == output_shape.dim(j));
113
114 concat_size += input_shape.dim(axis);
115 input_zeropoints[i] = input_type.getQuantization().getZeroPoint();
116 input_scales[i] = input_type.getQuantization().getScale();
117 }
118 assert(concat_size == output_shape.dim(axis));
119
120 const auto &output_type = output.getType();
121 assert(output_type.isQuantized());
122 int32_t output_zeropoint = output_type.getQuantization().getZeroPoint();
123 float output_scale = output_type.getQuantization().getScale();
124
125 // Outer size before axis
126 int32_t outer_size = 1;
127 for (int32_t i = 0; i < axis; i++)
128 outer_size *= output_shape.dim(i);
129 // Inner size after axis
130 int32_t base_inner_size = 1;
131 for (int32_t i = axis + 1; i < concat_dimensions; i++)
132 base_inner_size *= output_shape.dim(i);
133 // flatten = outer_size * dim(axis) * base_inner_size;
134
135 uint8_t *output_ptr = reinterpret_cast<uint8_t *>(output.atOffset(0));
136
137 const float inverse_output_scale = 1.f / output_scale;
138 for (int k = 0; k < outer_size; k++)
139 {
140 for (size_t i = 0; i < inputs_count; ++i)
141 {
142 const mir::TensorVariant &input = inputs[i];
143 const int copy_size = input.getShape().dim(axis) * base_inner_size;
144 const char *input_data = input.atOffset(0) + k * copy_size;
145 const uint8_t *input_ptr = reinterpret_cast<const uint8_t *>(input_data);
146 if (input_zeropoints[i] == output_zeropoint && input_scales[i] == output_scale)
147 {
148 std::memcpy(output_ptr, input_ptr, copy_size);
149 }
150 else
151 {
152 const float scale = input_scales[i] * inverse_output_scale;
153 const float bias = -input_zeropoints[i] * scale;
154 for (int j = 0; j < copy_size; ++j)
155 {
156 const int32_t value =
157 static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
158 output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
159 }
160 }
161 output_ptr += copy_size;
162 }
163 }
164}
165
166void Concat(const std::vector<std::reference_wrapper<const mir::TensorVariant>> &inputs, int axis,
167 mir::TensorVariant &output)
168{
169 dispatch<ConcatImpl>(inputs[0].get().getElementType(), inputs, axis, output);
170}
171
172} // namespace mir_interpreter
const luci_interpreter::RuntimeShape output_shape
void Concat(const std::vector< std::reference_wrapper< const mir::TensorVariant > > &inputs, int axis, mir::TensorVariant &output)
Definition Concat.cpp:166
static void run(const std::vector< std::reference_wrapper< const mir::TensorVariant > > &inputs, int axis, mir::TensorVariant &output)
Definition Concat.cpp:34