ONE - On-device Neural Engine
Loading...
Searching...
No Matches
RoPE.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "kernels/RoPE.h"
18
19#include "kernels/Utils.h"
20
21namespace luci_interpreter
22{
23namespace kernels
24{
25
26RoPE::RoPE(const Tensor *input, const Tensor *sin_table, const Tensor *cos_table, Tensor *output,
27 const RoPEParams &params)
28 : KernelWithParams<RoPEParams>({input, sin_table, cos_table}, {output}, params)
29{
30}
31
33{
34 LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
35 LUCI_INTERPRETER_CHECK(sin_table()->shape().dim(3) == input()->shape().dim(3));
36 LUCI_INTERPRETER_CHECK(cos_table()->shape().dim(3) == input()->shape().dim(3));
37
38 LUCI_INTERPRETER_CHECK(params().mode == RoPEMode::GPT_NEOX);
39
40 output()->resize(input()->shape());
41}
42
43void RoPE::execute() const
44{
45 switch (input()->element_type())
46 {
47 case DataType::FLOAT32:
48 evalFloat();
49 break;
50 default:
51 throw std::runtime_error("luci-rope Unsupported data type.");
52 }
53}
54
55void RoPE::evalFloat() const
56{
57 const auto input_shape = getTensorShape(input());
58 const auto sin_table_shape = getTensorShape(sin_table());
59 const auto cos_table_shape = getTensorShape(cos_table());
61
62 const float *input_data = getTensorData<float>(input());
63 const float *sin_table_data = getTensorData<float>(sin_table());
64 const float *cos_table_data = getTensorData<float>(cos_table());
65 float *output_data = getTensorData<float>(output());
66
67 if (params().mode == RoPEMode::GPT_NEOX)
68 {
69 const int32_t i0_n = input_shape.Dims(0);
70 const int32_t i1_n = input_shape.Dims(1); // multihead
71 const int32_t i2_n = input_shape.Dims(2);
72 const int32_t i3_n = input_shape.Dims(3); // head
73
74 for (int32_t i0 = 0; i0 < i0_n; ++i0)
75 {
76 for (int32_t i1 = 0; i1 < i1_n; ++i1)
77 {
78 for (int32_t i2 = 0; i2 < i2_n; ++i2)
79 {
80 for (int32_t i3 = 0; i3 < i3_n / 2; ++i3)
81 {
82 const int32_t offset = tflite::Offset(input_shape, i0, i1, i2, i3);
83 const float x0 = input_data[offset];
84 const float x1 = input_data[offset + i3_n / 2];
85
86 output_data[offset] = x0 * cos_table_data[i3] - x1 * sin_table_data[i3];
87 output_data[offset + i3_n / 2] =
88 x0 * sin_table_data[i3 + i3_n / 2] + x1 * cos_table_data[i3 + i3_n / 2];
89 }
90 }
91 }
92 }
93 }
94 else
95 throw std::runtime_error("luci-intp RoPE unsupported mode.");
96}
97
98} // namespace kernels
99} // namespace luci_interpreter
void resize(const Shape &new_shape)
Definition Tensor.cpp:56
RoPE(const Tensor *input, const Tensor *sin_table, const Tensor *cos_table, Tensor *output, const RoPEParams &params)
Definition RoPE.cpp:26
void configure() override
Definition RoPE.cpp:32
Tensor * output() const
Definition RoPE.h:37
const Tensor * cos_table() const
Definition RoPE.h:36
void execute() const override
Definition RoPE.cpp:43
const Tensor * sin_table() const
Definition RoPE.h:35
const Tensor * input() const
Definition RoPE.h:34
#define LUCI_INTERPRETER_CHECK(cond)
Definition Utils.h:36
__global uchar * offset(const Image *img, int x, int y)
Definition helpers.h:540
const luci_interpreter::RuntimeShape output_shape
tflite::RuntimeShape getTensorShape(const Tensor *tensor)
Definition Utils.h:194