ONE - On-device Neural Engine
Loading...
Searching...
No Matches
BatchMatMul.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
3 * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __NNFW_CKER_REFERENCE_BATCH_MATMUL_H__
19#define __NNFW_CKER_REFERENCE_BATCH_MATMUL_H__
20
21#include "cker/Types.h"
22#include "cker/Shape.h"
24
25namespace nnfw
26{
27namespace cker
28{
29namespace reference
30{
31
32inline void BatchMatMul(const BatchMatMulParams &params, const float *lhs_data,
33 const float *rhs_data, float *output_data)
34{
35 for (int b0 = 0; b0 < params.batch_dim0; ++b0)
36 {
37 const float *lhs_ptr0 = lhs_data + (b0 * params.lhs_ext0);
38 const float *rhs_ptr0 = rhs_data + (b0 * params.rhs_ext0);
39 for (int b1 = 0; b1 < params.batch_dim1; ++b1)
40 {
41 const float *lhs_ptr1 = lhs_ptr0 + b1 * params.lhs_ext1;
42 const float *rhs_ptr1 = rhs_ptr0 + b1 * params.rhs_ext1;
43 for (int b2 = 0; b2 < params.batch_dim2; ++b2)
44 {
45 const float *lhs_ptr2 = lhs_ptr1 + b2 * params.lhs_ext2;
46 const float *rhs_ptr2 = rhs_ptr1 + b2 * params.rhs_ext2;
47 float *out_ptr = output_data + ((b0 * params.batch_dim1 * params.batch_dim2) +
48 b1 * params.batch_dim2 + b2) *
49 params.lhs_rows * params.rhs_cols;
50 for (int j = 0; j < params.rhs_cols; ++j)
51 {
52 for (int i = 0; i < params.lhs_rows; ++i)
53 {
54 float total = 0.f;
55 for (int k = 0; k < params.accum_depth; ++k)
56 {
57 total += lhs_ptr2[params.accum_depth * i + k] * rhs_ptr2[j * params.accum_depth + k];
58 }
59 int idx = params.lhs_rows * j + i;
60 out_ptr[idx] = total;
61 }
62 }
63 }
64 }
65 }
66}
67
68} // namespace reference
69} // namespace cker
70} // namespace nnfw
71
72#endif // __NNFW_CKER_REFERENCE_BATCH_MATMUL_H__
Definition topk_v2.h:30