42 for (uint32_t r = 0;
r < tensor_shape.
rank(); ++
r)
59 shape.
rank(node->rank());
60 for (uint32_t r = 0;
r < node->rank(); ++
r)
68 shape.
dim(r) = node->dim(r).known() ? node->dim(r).value() : 1;
86class TensorShapeExpander
97 auto const &input_shape = _shape;
98 uint32_t
const input_rank = input_shape.
rank();
100 assert(input_rank <= output_rank &&
"Cannot shrink rank");
101 uint32_t
const axis_shift = output_rank - input_rank;
106 for (uint32_t axis = 0; axis < output_rank; ++axis)
108 output_shape.dim(axis) = (axis < axis_shift) ? 1 : input_shape.dim(axis - axis_shift);
123 auto x_rank = x.
rank();
124 auto y_rank = y.
rank();
126 if (x_rank == y_rank)
129 TensorShapeExpander x_exp(x);
130 TensorShapeExpander y_exp(y);
132 auto xy_rank = std::max(x_rank, y_rank);
134 x = x_rank > y_rank ? x : x_exp.to(xy_rank);
135 y = y_rank > x_rank ? y : y_exp.to(xy_rank);
145 auto rank = x.
rank();
150 for (uint32_t axis = 0; axis < rank; ++axis)
156 if (!((x_dim == y_dim) || (x_dim == 1 || y_dim == 1)))
157 INTERNAL_EXN(
"Cannot produce expand_dimension of two shapes");
170 expand_rank(x_match, y_match);
180template <loco::DataType T> std::vector<int64_t> vector_from_constant(
luci::CircleConst *const_node)
182 std::vector<int64_t>
result;
184 for (uint32_t idx = 0; idx < const_node->
size<T>(); ++idx)
185 result.push_back(const_node->
at<T>(idx));
190template <
class CIRCLENODE>
loco::NodeShape broadcast_xy(
const CIRCLENODE *node)
192 auto x_shape =
luci::shape_get(node->x()).template as<loco::TensorShape>();
193 auto y_shape =
luci::shape_get(node->y()).template as<loco::TensorShape>();
200#define DECLARE_USE_SINGLE(NAME) \
201 template <class CIRCLENODE> loco::NodeShape use_##NAME(const CIRCLENODE *node) \
203 auto inputs_shape = luci::shape_get(node->NAME()).template as<loco::TensorShape>(); \
204 return loco::NodeShape{inputs_shape}; \
212#undef DECLARE_USE_SINGLE
214template <
class CIRCLENODE>
220 auto input_shape =
luci::shape_get(node->input()).template as<loco::TensorShape>();
223 LUCI_ASSERT(paddings->dtype() == S32 || paddings->dtype() == S64,
"Support int 32/64 for now");
224 LUCI_ASSERT(paddings->rank() == 2,
"paddings should be rank 2")
226 int32_t n = paddings->dim(0).value();
227 int32_t v = paddings->dim(1).value();
231 "paddings [n, 2] should have same value of input rank");
236 for (int32_t ni = 0; ni < n; ++ni)
238 int32_t idx = ni * 2;
239 int value = input_shape.dim(ni).value();
240 if (paddings->dtype() == S32)
242 value += paddings->
at<S32>(idx + 0);
243 value += paddings->
at<S32>(idx + 1);
247 auto pl = paddings->
at<
S64>(idx + 0);
248 auto pr = paddings->
at<
S64>(idx + 1);
249 auto max =
static_cast<int64_t
>(std::numeric_limits<int32_t>::max());
250 auto low =
static_cast<int64_t
>(std::numeric_limits<int32_t>::lowest());
251 LUCI_ASSERT(pl <= max,
"paddings is over 32 bit limit");
252 LUCI_ASSERT(pl >= low,
"paddings is over 32 bit limit");
253 LUCI_ASSERT(pr <= max,
"paddings is over 32 bit limit");
254 LUCI_ASSERT(pr >= low,
"paddings is over 32 bit limit");
255 value +=
static_cast<int32_t
>(pl);
256 value +=
static_cast<int32_t
>(pr);
268 for (uint32_t idx = 1; idx < node->
arity(); ++idx)
271 if (!(shape == shape_idx))
279template <
class CIRCLENODE>
loco::NodeShape infer_arg_maxmin(
const CIRCLENODE *node)
281 auto input_shape =
luci::shape_get(node->input()).template as<loco::TensorShape>();
282 auto dimension_shape =
luci::shape_get(node->dimension()).template as<loco::TensorShape>();
284 int64_t select_axis = 0;
286 LUCI_ASSERT(node->dimension(),
"2nd input dimension() should not be nullptr");
290 auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
291 LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
292 "Only support int32 CircleConst for CircleArgMax/CircleArgMin");
294 if (const_shape_node->rank() > 1)
298 select_axis = const_shape_node->template scalar<loco::DataType::S32>();
301 assert(select_axis < input_shape.rank());
304 select_axis +=
static_cast<int64_t
>(input_shape.rank());
308 uint32_t rank = input_shape.
rank();
309 uint32_t shrink =
static_cast<uint32_t
>(select_axis);
311 shape_output.
rank(rank - 1);
312 for (uint32_t r = 0, d = 0;
r < rank; ++
r)
316 shape_output.
dim(d++) = input_shape.dim(r);
322template <
class Pool2DType>
loco::NodeShape infer_pool_2d_shape(
const Pool2DType *node)
324 auto ifm_shape =
luci::shape_get(node->value()).template as<loco::TensorShape>();
325 assert(ifm_shape.rank() == 4);
326 assert(ifm_shape.dim(1).known());
327 assert(ifm_shape.dim(2).known());
329 uint32_t input_height = ifm_shape.dim(1).value();
330 uint32_t input_width = ifm_shape.dim(2).value();
331 uint32_t stride_height = node->stride()->h();
332 uint32_t stride_width = node->stride()->w();
333 uint32_t window_height = node->filter()->h();
334 uint32_t window_width = node->filter()->w();
335 uint32_t dilation_height = 1;
336 uint32_t dilation_width = 1;
337 uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
338 uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
340 uint32_t output_height = 0;
341 uint32_t output_width = 0;
345 LUCI_ASSERT(input_height + stride_height > effective_window_height,
"Invalid shape");
346 LUCI_ASSERT(input_width + stride_width > effective_window_width,
"Invalid shape");
347 output_height = (input_height + stride_height - effective_window_height) / stride_height;
348 output_width = (input_width + stride_width - effective_window_width) / stride_width;
352 output_height = (input_height + stride_height - 1) / stride_height;
353 output_width = (input_width + stride_width - 1) / stride_width;
360 ofm_shape.
dim(0) = ifm_shape.dim(0);
361 ofm_shape.
dim(1) = output_height;
362 ofm_shape.
dim(2) = output_width;
363 ofm_shape.
dim(3) = ifm_shape.dim(3);
374 assert(input_shape.rank() == 3 || input_shape.rank() == 4);
377 auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->
block_shape());
378 LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
"Only support int32 block_shape");
381 auto const_crops = loco::must_cast<luci::CircleConst *>(node->
crops());
382 LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32,
"Only support int32 crops");
386 assert(const_block_shape_shape.rank() == 1);
387 assert(const_crops_shape.
rank() == 2);
389 int32_t input_spatial_dim = input_shape.rank() - 2;
390 assert(const_block_shape_shape.dim(0) == input_spatial_dim);
391 assert(const_crops_shape.
dim(0) == input_spatial_dim);
392 assert(const_crops_shape.
dim(1) == 2);
396 shape_output.
rank(input_shape.rank());
398 int32_t output_batch_size = input_shape.dim(0).value();
399 for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
401 int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<
S32>(dim);
402 dim_size -= const_crops->at<
S32>(dim * 2);
403 dim_size -= const_crops->at<
S32>(dim * 2 + 1);
404 shape_output.
dim(dim + 1) = dim_size;
406 assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
407 output_batch_size = output_batch_size / const_block_shape->at<
S32>(dim);
409 shape_output.
dim(0) = output_batch_size;
410 shape_output.
dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
421template <
class Conv2DType> OutputSize infer_conv2d_type(
const Conv2DType *node)
423 auto ifm_shape =
luci::shape_get(node->input()).template as<loco::TensorShape>();
424 auto ker_shape =
luci::shape_get(node->filter()).template as<loco::TensorShape>();
425 assert(ifm_shape.rank() == 4);
426 assert(ker_shape.rank() == 4);
427 assert(ifm_shape.dim(1).known());
428 assert(ifm_shape.dim(2).known());
429 assert(ker_shape.dim(1).known());
430 assert(ker_shape.dim(2).known());
432 uint32_t input_height = ifm_shape.dim(1).value();
433 uint32_t input_width = ifm_shape.dim(2).value();
434 uint32_t stride_height = node->stride()->h();
435 uint32_t stride_width = node->stride()->w();
436 uint32_t ker_height = ker_shape.dim(1).value();
437 uint32_t ker_width = ker_shape.dim(2).value();
438 uint32_t dilation_height = node->dilation()->h();
439 uint32_t dilation_width = node->dilation()->w();
440 uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
441 uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
443 uint32_t output_height = 0;
444 uint32_t output_width = 0;
448 LUCI_ASSERT(input_height + stride_height > effective_ker_height,
"Invalid shape");
449 LUCI_ASSERT(input_width + stride_width > effective_ker_width,
"Invalid shape");
450 output_height = (input_height + stride_height - effective_ker_height) / stride_height;
451 output_width = (input_width + stride_width - effective_ker_width) / stride_width;
455 output_height = (input_height + stride_height - 1) / stride_height;
456 output_width = (input_width + stride_width - 1) / stride_width;
461 OutputSize os{output_height, output_width};
476 if (const_shape_node !=
nullptr)
478 LUCI_ASSERT(const_shape_node->dtype() == S32,
"Only support int32 CircleConst");
480 shape_by_input.
rank(const_shape_node->size<S32>());
481 for (uint32_t axis = 0; axis < shape_by_input.
rank(); ++axis)
483 shape_by_input.
dim(axis) = const_shape_node->at<
S32>(axis);
489 shape_by_input = own_shape(node);
503 assert(ifm_shape.rank() == 4);
504 assert(ker_shape.
rank() == 4);
505 assert(ifm_shape.dim(3) == ker_shape.
dim(3));
507 auto os = infer_conv2d_type(node);
511 ofm_shape.
dim(0) = ifm_shape.dim(0);
512 ofm_shape.
dim(1) = os.height;
513 ofm_shape.
dim(2) = os.width;
514 ofm_shape.
dim(3) = ker_shape.
dim(0);
516 INFO(l) <<
"[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() <<
") ker(" << ker_shape.
rank()
517 <<
") output(" << ofm_shape.
dim(0).
value() <<
"," << ofm_shape.
dim(1).
value() <<
","
527 LUCI_ASSERT(input_shape.rank() == 4,
"Only input rank 4 is supported");
531 int32_t height = input_shape.dim(1).value();
532 int32_t width = input_shape.dim(2).value();
533 int32_t depth = input_shape.dim(3).value();
540 if (depth % (block_size * block_size))
542 INTERNAL_EXN(
"The input tensor's depth must be divisible by block_size^2");
551 output_shape.dim(3) = depth / (block_size * block_size);
561 assert(ifm_shape.rank() == 4);
562 assert(ker_shape.
rank() == 4);
563 assert(ker_shape.
dim(0).
value() == 1);
566 auto os = infer_conv2d_type(node);
570 ofm_shape.
dim(0) = ifm_shape.dim(0);
571 ofm_shape.
dim(1) = os.height;
572 ofm_shape.
dim(2) = os.width;
573 ofm_shape.
dim(3) = ker_shape.
dim(3);
582 if (x_shape.rank() == 0)
585 return use_own(node);
587 auto const_axis = loco::must_cast<luci::CircleConst *>(node->
axis());
588 LUCI_ASSERT(const_axis->dtype() == S32,
"Only support int32 CircleConst for axis");
589 if (const_axis->rank() != 0 && const_axis->rank() != 1)
593 int32_t axis = const_axis->at<
S32>(0);
594 LUCI_ASSERT((axis <=
static_cast<int32_t
>(x_shape.rank())) &&
595 (axis >= -1 -
static_cast<int32_t
>(x_shape.rank())),
596 "Axis has to be between [-(D+1), D], where D is rank of input.");
597 size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
601 for (; i < positive_axis; i++)
604 for (; i < x_shape.rank(); i++)
616 if (dims_node !=
nullptr)
619 LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32,
"Only support int32 CircleConst");
621 if (dims_node->rank() != 1)
624 shape.
rank(dims_node->dim(0).value());
626 for (uint32_t axis = 0; axis < shape.
rank(); ++axis)
628 shape.
dim(axis) = dims_node->at<loco::DataType::S32>(axis);
633 shape = own_shape(node);
646 int32_t axis = node->
axis();
650 if (input_shape.rank() == 0 || positions_shape.
rank() == 0)
651 return use_own(node);
654 axis += input_shape.rank();
657 int32_t outdim_index = 0;
658 for (int32_t i = 0; i < axis; ++i)
660 for (uint32_t i = 0; i < positions_shape.
rank(); ++i)
662 for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
675 const auto params_rank = params_shape.
rank();
676 const auto indices_rank = indices_shape.
rank();
684 if (!indices_shape.
dim(indices_rank - 1).
known())
687 auto indices_last_dim = indices_shape.
dim(indices_rank - 1).
value();
689 if (indices_last_dim > params_rank)
690 INTERNAL_EXN(
"Last indices dimension should be <= params rank");
692 const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
696 uint32_t output_index = 0;
697 for (uint32_t i = 0; i < indices_rank - 1; ++i)
699 auto &dim = indices_shape.
dim(i);
701 INTERNAL_EXN(
"Unknown indices dimension is unsupported");
705 for (uint32_t i = indices_last_dim; i < params_rank; ++i)
707 auto &dim = params_shape.dim(i);
709 INTERNAL_EXN(
"Unknown params dimension is unsupported");
721 auto rank = diagonal_shape.
rank();
725 for (uint32_t i = 0; i < rank; i++)
740 auto rank = diagonal_shape.
rank();
742 LUCI_ASSERT(rank == input_shape.rank() - 1,
"diagonal rank = input rank - 1");
744 for (uint32_t i = 0; i < rank - 1; i++)
746 LUCI_ASSERT(diagonal_shape.
dim(i) == input_shape.dim(i),
"diagonal dims = input dims");
749 auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
751 LUCI_ASSERT(dim == diagonal_shape.
dim(rank - 1),
"Max diag len error");
761 auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
766 LUCI_ASSERT(reduction_indices->dtype() == S32,
"Only support int 32");
769 std::vector<int32_t> reduction_values;
771 for (uint32_t i = 0; i < reduction_indices->size<
S32>(); ++i)
773 int32_t axis = reduction_indices->at<
S32>(i);
775 axis += input_shape.rank();
776 if (not(0 <= axis and axis <
static_cast<int32_t
>(input_shape.rank())))
778 reduction_values.push_back(axis);
786 for (uint32_t i = 0; i < input_shape.rank(); ++i)
788 for (uint32_t i = 0; i < reduction_values.size(); ++i)
793 std::vector<bool> check_reduce(input_shape.rank(),
false);
794 for (uint32_t i = 0; i < reduction_values.size(); ++i)
795 check_reduce.at(reduction_values.at(i)) =
true;
797 uint32_t reduce_cnt = 0;
798 for (uint32_t i = 0; i < check_reduce.size(); ++i)
799 if (check_reduce.at(i))
803 for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
804 if (check_reduce.at(i) ==
false)
814 auto paddings = loco::must_cast<luci::CircleConst *>(node->
paddings());
815 return use_paddings(node, paddings);
824 auto depth = loco::must_cast<luci::CircleConst *>(node->
depth());
825 LUCI_ASSERT(depth->dtype() == S32,
"Only support int32 CircleConst");
826 if (depth->
rank() != 0)
830 auto axis = node->
axis();
832 axis += indices_shape.
rank() + 1;
834 LUCI_ASSERT(
static_cast<uint32_t
>(axis) <= indices_shape.
rank(),
"Axis is out of range");
838 if (i ==
static_cast<uint32_t
>(axis))
860 "All inputs must have the same shape");
867 auto axis = node->
axis();
869 axis += first_shape.rank() + 1;
872 LUCI_ASSERT(
static_cast<uint32_t
>(axis) <= first_shape.rank(),
"Axis is out of range");
880 if (i ==
static_cast<uint32_t
>(axis))
902 return use_paddings(node, paddings);
915template <
class CIRCLENODE>
loco::NodeShape infer_resize_type(
const CIRCLENODE *node)
917 auto input_shape =
luci::shape_get(node->input()).template as<loco::TensorShape>();
919 if (input_shape.rank() != 4)
922 auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
924 if (const_node->dtype() != loco::DataType::S32)
925 INTERNAL_EXN(
"Only S32 datatype is supported for size");
927 if (const_node->rank() != 1)
930 if (const_node->dim(0).value() != 2)
936 output_shape.dim(1) = const_node->template at<loco::DataType::S32>(0);
937 output_shape.dim(2) = const_node->template at<loco::DataType::S32>(1);
947 auto shape_node = loco::must_cast<luci::CircleConst *>(node->
shape());
952 std::vector<int64_t> vect_shape;
954 if (shape_node->dtype() == S32)
955 vect_shape = vector_from_constant<S32>(shape_node);
956 else if (shape_node->dtype() == S64)
957 vect_shape = vector_from_constant<S64>(shape_node);
959 LUCI_ASSERT(
false,
"Only support int32/int64 for shape()");
962 for (uint32_t i = 0; i < vect_shape.size(); ++i)
973 LUCI_ASSERT(segment_shape.
rank() == 1,
"segment_ids must be 1-D tensor");
975 "segment_ids size must be equal to the size of data's first dimension");
977 auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->
segment_ids());
979 std::vector<int64_t> vect_ids;
981 if (ids_shape_value->dtype() == loco::DataType::S32)
982 vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
984 LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
985 "segment_ids values should be sorted")
991 for (uint32_t i = 1; i < input_shape.rank(); ++i)
1006 if (c_shape.rank() != t_shape.rank())
1008 if (c_shape.rank() != 0 && c_shape.rank() != 1)
1009 INTERNAL_EXN_V(
"CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
1011 if (c_shape.rank() == 1)
1013 if (c_shape.dim(0).value() != t_shape.dim(0).value())
1014 INTERNAL_EXN(
"CircleSelect condition dim(0) should match with t.dim(0)");
1051 auto const_begin = loco::must_cast<luci::CircleConst *>(node->
begin());
1052 auto const_size = loco::must_cast<luci::CircleConst *>(node->
size());
1055 std::vector<int64_t> vect_begin;
1056 std::vector<int64_t> vect_size;
1058 if (const_begin->dtype() == S32)
1059 vect_begin = vector_from_constant<S32>(const_begin);
1060 else if (const_begin->dtype() == S64)
1061 vect_begin = vector_from_constant<S64>(const_begin);
1063 LUCI_ASSERT(
false,
"Only support int32/int64 for begin()");
1065 if (const_size->dtype() == S32)
1066 vect_size = vector_from_constant<S32>(const_size);
1067 else if (const_size->dtype() == S64)
1068 vect_size = vector_from_constant<S64>(const_size);
1070 LUCI_ASSERT(
false,
"Only support int32/int64 for size()");
1072 assert(input_shape.rank() == vect_begin.size());
1073 assert(input_shape.rank() == vect_size.size());
1076 for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
1078 auto size = vect_size.at(idx);
1081 size =
static_cast<int64_t
>(input_shape.dim(idx).value()) - vect_begin.at(idx);
1095 assert(input_shape.rank() == 3 || input_shape.rank() == 4);
1098 auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->
block_shape());
1099 LUCI_ASSERT(const_block_shape->dtype() == S32,
"Only support int32 block_shape");
1102 auto const_paddings = loco::must_cast<luci::CircleConst *>(node->
paddings());
1103 LUCI_ASSERT(const_paddings->dtype() == S32,
"Only support int32 paddings");
1107 assert(const_block_shape_shape.rank() == 1);
1108 assert(const_paddings_shape.
rank() == 2);
1110 int32_t input_spatial_dim = input_shape.rank() - 2;
1111 assert(const_block_shape_shape.dim(0) == input_spatial_dim);
1112 assert(const_paddings_shape.
dim(0) == input_spatial_dim);
1113 assert(const_paddings_shape.
dim(1) == 2);
1116 uint32_t ele_count = const_block_shape->size<
S32>();
1117 for (uint32_t e = 0; e < ele_count; ++e)
1119 auto val = const_block_shape->at<
S32>(e);
1128 shape_output.
rank(input_shape.rank());
1130 int32_t output_batch_size = input_shape.dim(0).value();
1131 for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
1133 int dim_size = input_shape.dim(dim + 1).value();
1134 dim_size += const_paddings->at<
S32>(dim * 2);
1135 dim_size += const_paddings->at<
S32>(dim * 2 + 1);
1136 shape_output.
dim(dim + 1) = dim_size / const_block_shape->at<
S32>(dim);
1138 assert(dim_size % const_block_shape->at<S32>(dim) == 0);
1139 output_batch_size = output_batch_size * const_block_shape->at<
S32>(dim);
1141 shape_output.
dim(0) = output_batch_size;
1142 shape_output.
dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
1150 LUCI_ASSERT(input_shape.rank() == 4,
"Only input rank 4 is supported");
1153 int32_t height = input_shape.dim(1).value();
1154 int32_t width = input_shape.dim(2).value();
1155 int32_t depth = input_shape.
dim(3).
value();
1162 if ((height % block_size) || (width % block_size))
1164 INTERNAL_EXN(
"The input tensor's height and width must be divisible by block_size");
1185 if (output_shape_node !=
nullptr)
1187 const auto output_shape_type = output_shape_node->dtype();
1189 if (output_shape_node->rank() != 1)
1193 if (output_shape_type == loco::DataType::S32)
1195 shape.
rank(output_shape_node->size<loco::DataType::S32>());
1197 for (uint32_t axis = 0; axis < shape.
rank(); ++axis)
1199 shape.
dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
1202 else if (output_shape_type == loco::DataType::S64)
1204 shape.
rank(output_shape_node->size<loco::DataType::S64>());
1206 for (uint32_t axis = 0; axis < shape.
rank(); ++axis)
1208 shape.
dim(axis) = output_shape_node->at<loco::DataType::S64>(axis);
1213 INTERNAL_EXN(
"Output shape of SparseToDense must be either int32 or int64");
1218 shape = own_shape(node);
1230 std::vector<bool> do_squeeze(input_shape.rank(),
false);
1231 uint32_t num_squeezed = 0;
1236 for (int32_t raw_dim : node->squeeze_dims())
1238 int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
1240 if (dim < 0 ||
static_cast<uint32_t
>(dim) >= input_shape.rank() ||
1241 input_shape.dim(dim).value() != 1)
1243 INTERNAL_EXN(
"invalid dimention specified to Squeeze");
1246 if (!do_squeeze[dim])
1248 do_squeeze[dim] =
true;
1254 for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
1256 if (input_shape.dim(dim) == 1)
1258 do_squeeze[dim] =
true;
1267 for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
1269 if (!do_squeeze[in_dim])
1283 assert(ifm_shape.rank() == 2);
1284 assert(weight_feature_shape.
rank() == 2);
1286 assert(ifm_shape.dim(1) == weight_feature_shape.
dim(1));
1287 assert(weight_feature_shape.
dim(0).
known());
1290 const auto num_filters = weight_feature_shape.
dim(0).
value();
1291 assert(num_filters % rank == 0);
1292 const auto num_units = num_filters / rank;
1296 ofm_shape.
dim(0) = ifm_shape.dim(0);
1297 ofm_shape.
dim(1) = num_units;
1307 auto multiples = loco::must_cast<luci::CircleConst *>(node->
multiples());
1311 LUCI_ASSERT(multiples->dtype() == S32,
"Only support int32 multiples");
1314 uint32_t n = multiples->dim(0).value();
1316 LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
1321 for (uint32_t ni = 0; ni < n; ++ni)
1323 int32_t multiple = multiples->at<
S32>(ni);
1324 output_shape.dim(ni) = input_shape.dim(ni).value() *
static_cast<uint32_t
>(multiple);
1334 auto perm_node = loco::must_cast<luci::CircleConst *>(node->
perm());
1339 assert(perm_node->dtype() == loco::DataType::S32);
1340 assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
1342 for (uint32_t out_axis = 0; out_axis <
output_shape.rank(); out_axis++)
1344 auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
1355 if (not input_sizes_const)
1356 return use_own(node);
1358 LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32,
"Only support S32 dtype")
1359 LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
1360 "Only support rank 1 with 4 entries")
1362 loco::TensorShape shape;
1365 for (uint32_t axis = 0; axis < 4; ++axis)
1366 shape.dim(axis) = input_sizes_const->at<
loco::
DataType::S32>(axis);
1368 return
loco::NodeShape{shape};
1378 auto axis = node->
axis();
1379 auto num = node->
num();
1380 auto rank =
static_cast<int32_t
>(value_shape.rank());
1385 return use_own(node);
1388 LUCI_ASSERT(-rank <= axis && axis < rank,
"Axis is out of range");
1393 LUCI_ASSERT(num ==
static_cast<int32_t
>(value_shape.dim(axis).value()),
1394 "num, axis maybe incorrect");
1399 for (int32_t i = 0, o = 0; i < rank; ++i)
1411 auto recurrent_to_output_weights =
1413 auto rank = input_shape.
rank();
1416 for (uint32_t i = 0; i < rank - 1; i++)
1428 assert(input_shape.rank() == 1);
1431 shape_output = own_shape(node);
1442 auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->
weights_clusters());
1444 LUCI_ASSERT(input_shape.rank() == 2,
"Input rank of BCQFullyConnected should be 2");
1446 int32_t qbits_sum = 0;
1447 for (uint32_t i = 0; i < weights_clusters->
dim(0).value(); ++i)
1449 qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
1453 out_shape.
dim(0) = qbits_sum;
1454 out_shape.
dim(1) = input_shape.dim(1);
1466 auto axis = node->
axis();
1468 auto input_clusters = loco::must_cast<luci::CircleConst *>(node->
input_clusters());
1470 for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
1472 qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
1475 input_shape.
rank(2);
1476 input_shape.
dim(0) = qbits_sum;
1477 input_shape.
dim(1) = input_binary_shape.dim(1).
value() * 32;
1480 int32_t outdim_index = 0;
1481 for (int32_t i = 0; i < axis; ++i)
1483 for (uint32_t i = 0; i < indices_shape.
rank(); ++i)
1485 for (uint32_t i = axis + 1; i < input_shape.
rank(); ++i)
1498 auto rank = input_shape.
rank();
1501 for (uint32_t i = 0; i < rank - 1; i++)
1518 shape.
rank(node->rank());
1519 for (uint32_t axis = 0; axis < node->rank(); axis++)
1520 shape.
dim(axis) = node->dim(axis);
1528 auto graph_output = graph_outputs->
at(node->
index());
1539 if (nmsv4 ==
nullptr)
1540 INTERNAL_EXN(
"CircleNonMaxSuppressionV4 IR is not configured correctly");
1549 auto max_output_size =
dynamic_cast<const luci::CircleConst *
>(nmsv4->max_output_size());
1550 if (max_output_size ==
nullptr)
1553 LUCI_ASSERT(max_output_size->dtype() == S32,
"Only support int32 for max_output_size");
1555 if (max_output_size->size<S32>() < 1)
1558 auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
1567 if (nmsv5 ==
nullptr)
1568 INTERNAL_EXN(
"CircleNonMaxSuppressionV5 IR is not configured correctly");
1574 assert(index == 0 || index == 1);
1577 auto max_output_size =
dynamic_cast<const luci::CircleConst *
>(nmsv5->max_output_size());
1578 if (max_output_size ==
nullptr)
1581 LUCI_ASSERT(max_output_size->dtype() == S32,
"Only support int32 for max_output_size");
1583 if (max_output_size->size<S32>() < 1)
1586 auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
1595 if (split ==
nullptr)
1596 INTERNAL_EXN(
"CircleSplit IR is not configured correctly");
1603 if (split_dim ==
nullptr)
1605 LUCI_ASSERT(split_dim->dtype() == S32,
"Only support int32 for split_dim");
1607 assert(split_dim->size<S32>() == 1);
1608 auto split_dim_axis = split_dim->at<
S32>(0);
1609 if (split_dim_axis < 0)
1610 split_dim_axis += split_shape.rank();
1612 auto split_dim_value = split_shape.dim(split_dim_axis).value();
1613 assert(split_dim_value %
split->num_split() == 0);
1614 const int split_depth = split_dim_value /
split->num_split();
1629 if (split ==
nullptr)
1630 INTERNAL_EXN(
"CircleSplit IR is not configured correctly");
1637 if (size_splits ==
nullptr)
1639 LUCI_ASSERT(size_splits->dtype() == S32,
"Only support int32 for size_splits");
1642 if (split_dim ==
nullptr)
1644 LUCI_ASSERT(split_dim->dtype() == S32,
"Only support int32 for split_dim");
1647 assert(split_dim->size<S32>() == 1);
1648 auto split_dim_axis = split_dim->at<
S32>(0);
1649 if (split_dim_axis < 0)
1650 split_dim_axis += split_shape.rank();
1653 int32_t size_splits_count =
static_cast<int32_t
>(size_splits->size<
S32>());
1654 assert(size_splits_count ==
split->num_split());
1656 int64_t minus_one_count = 0, size_splits_sum = 0;
1657 for (int32_t idx = 0; idx < size_splits_count; ++idx)
1659 auto size = size_splits->at<
S32>(idx);
1664 size_splits_sum +=
size;
1666 if (minus_one_count > 1)
1667 INTERNAL_EXN(
"CircleSplitV size_splits has more than two -1 values");
1670 auto input_size = split_shape.dim(split_dim_axis).value();
1671 assert(size_splits_sum <= input_size);
1673 auto index_this = node->
index();
1674 assert(0 <= index_this && index_this < split->num_split());
1675 auto split_depth = size_splits->at<
S32>(index_this);
1676 if (split_depth == -1)
1677 split_depth =
static_cast<int32_t
>(input_size) -
static_cast<int32_t
>(size_splits_sum);
1691 if (topkv2 ==
nullptr)
1692 INTERNAL_EXN(
"CircleSplit IR is not configured correctly");
1697 auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
1698 LUCI_ASSERT(node_k->dtype() == S32,
"Only support Int32");
1699 assert(node_k->size<S32>() == 1);
1704 for (uint32_t idx = 0; idx < input_shape.
rank() - 1; ++idx)
1715 if (node->
index() == 0)
1717 auto unique_shape = own_shape(node);
1720 assert(node->
index() == 1);
1721 auto unique = loco::must_cast<luci::CircleUnique *>(node->
input());
1724 assert(unique_shape.rank() == 1);
1727 shape_output.
rank(1);
1728 shape_output.
dim(0) = unique_shape.dim(0);
1735 if (unpack ==
nullptr)
1737 INTERNAL_EXN(
"CircleUnpack IR is not configured correctly");
1752 if (circle_while ==
nullptr)
1754 INTERNAL_EXN(
"CircleWhile IR is not configured correctly");
1758 auto cond_graph = circle_while->cond_graph();
1759 assert(cond_graph !=
nullptr);
1764 auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
1766 auto cond_graph_inputs = cond_graph->inputs();
1767 auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
1769 const auto &cond_graph_input_shape = *cond_graph_input->shape();
1770 auto this_shape = own_shape(node);
1772 if (!(this_shape == cond_graph_input_shape))
1775 WARN(l) <<
"Warning: CircleWhileOut '" << node->
name() <<
"' shape mispatch " << this_shape
1776 <<
" vs " << cond_graph_input_shape;
1800 return infer_pool_2d_shape(node);
1805 return infer_batch_to_space_nd(node);
1810 return infer_broadcast_to(node);
1831 return infer_depth_to_space(node);
1836 return infer_depthwise_conv2d(node);
1858 return infer_expand_dims(node);
1896 assert(node->input_count() > 0);
1905 return infer_pool_2d_shape(node);
1936 return infer_matrix_diag(node);
1941 return infer_matrix_set_diag(node);
1948 return infer_pool_2d_shape(node);
1953 auto output_shape = infer_reducer(node->
input(), node->reduction_indices(), node->keep_dims());
1988 shape_output.
rank(0);
1995 auto output_shape = infer_reducer(node->
input(), node->reduction_indices(), node->keep_dims());
2001 auto output_shape = infer_reducer(node->
input(), node->reduction_indices(), node->keep_dims());
2007 auto output_shape = infer_reducer(node->
input(), node->reduction_indices(), node->keep_dims());
2013 auto output_shape = infer_reducer(node->
input(), node->reduction_indices(), node->keep_dims());
2047 return infer_resize_type(node);
2052 return infer_resize_type(node);
2069 "Tensor must be 1-D");
2078 return infer_segment_sum(node);
2093 return infer_space_to_batch_nd(node);
2098 return infer_space_to_depth(node);
2103 return infer_sparse_to_dense(node);
2126 return broadcast_xy(node);
2135 auto output_shape = infer_reducer(node->
input(), node->reduction_indices(), node->keep_dims());
2156 return infer_transpose_conv(node);
2163 return infer_unidirectionalsequencelstm(node);
2173 assert(node->
arity() > 0);
2188 return infer_bcq_fully_connected(node);
2229 return infer_non_max_suppression_v4_out(node);
2234 return infer_non_max_suppression_v5_out(node);
2243 return infer_top_k_v2_out(node);
2271 ShapeInferenceAlgorithm alg;
2272 auto circle_node = loco::must_cast<const CircleNode *>(node);
2276 bool is_scalar = (circle_node->rank() == 0);
2278 if (is_shape_undefined)
2279 shape = circle_node->accept(&alg);
2283 shape = own_shape(circle_node);
2285 shape = circle_node->accept(&alg);
2288 VERBOSE(l, 1) <<
"[luci] shape: " << circle_node->name();
2289 VERBOSE(l, 1) <<
" own_shape: " << own_shape(circle_node)
#define INTERNAL_EXN(msg)
@ brief throw internal exception with message
#define INTERNAL_EXN_V(msg, val)
@ brief throw internal exception with message and value
std::ostream & operator<<(std::ostream &os, const circledump::ModelEx &model)
The value of one dimension in a tensor shape.
uint32_t value(void) const
Return the value.
bool known(void) const
Return whether the value is known (or not)
OutputContext * outputs(void)
Logical unit of computation.
virtual const Dialect * dialect(void) const =0
Return "Dialect" identifier that this node belongs to.
T * at(uint32_t n) const
Access N-th object.
const Dimension & dim(uint32_t axis) const
uint32_t rank(void) const
Node * inputs(uint32_t index) const
AVERAGE_POOL_2D in Circle.
BCQ_FULLY_CONNECTED in Circle.
loco::Node * input(void) const
loco::Node * weights_clusters(void) const
loco::Node * input_binary(void) const
loco::Node * input_clusters(void) const
loco::Node * indices(void) const
BATCH_TO_SPACE_ND in Circle.
loco::Node * input(void) const
loco::Node * crops(void) const
loco::Node * block_shape(void) const
loco::Node * shape(void) const
Class to build tensor data.
const loco::DataTypeImpl< DT >::Type & at(uint32_t n) const
uint32_t size(void) const
loco::Node * filter(void) const
loco::Node * input(void) const
Virtual CIRCLECUSTOMOUT in Circle.
DEPTH_TO_SPACE in Circle.
int32_t block_size(void) const
loco::Node * input(void) const
DEPTHWISE_CONV_2D in Circle.
loco::Node * filter(void) const
int32_t depthMultiplier(void) const
loco::Node * input(void) const
static loco::Dialect * get(void)
loco::Node * axis(void) const
loco::Node * input(void) const
loco::Node * dims(void) const
loco::Node * input(void) const
loco::Node * state(void) const
bool returnSequences() const
loco::Node * params(void) const
loco::Node * indices(void) const
loco::Node * params(void) const
loco::Node * indices(void) const
L2_NORMALIZATION in Circle.
LOCAL_RESPONSE_NORMALIZATION in Circle.
loco::Node * diagonal(void) const
MATRIX_SET_DIAG in Circle.
loco::Node * diagonal(void) const
loco::Node * input(void) const
loco::Node * paddings(void) const
NON_MAX_SUPPRESSION_V4 in Circle.
Virtual NONMAXSUPPRESSIONV4OUT in Circle.
int32_t index(void) const
loco::Node * input(void) const
NON_MAX_SUPPRESSION_V5 in Circle.
Virtual NONMAXSUPPRESSIONV5OUT in Circle.
loco::Node * input(void) const
int32_t index(void) const
loco::Node * depth(void) const
loco::Node * indices(void) const
Temporary DummyNode used with dangle CircleNode.
CircleOutputExclude is used to specifying not exported nodes.
CircleNode for Output of the Graph.
void index(const loco::GraphOutputIndex &index)
loco::Node * input(void) const
loco::Node * alpha(void) const
Node * values(uint32_t index) const
uint32_t values_count(void) const
loco::Node * paddings(void) const
RESIZE_BILINEAR in Circle.
RESIZE_NEAREST_NEIGHBOR in Circle.
REVERSE_SEQUENCE in Circle.
loco::Node * input(void) const
loco::Node * weight_feature(void) const
int32_t svdf_rank() const
loco::Node * shape(void) const
loco::Node * segment_ids(void) const
loco::Node * input(void) const
loco::Node * e(void) const
loco::Node * condition(void) const
loco::Node * t(void) const
loco::Node * e(void) const
loco::Node * t(void) const
loco::Node * condition(void) const
loco::Node * input(void) const
loco::Node * size(void) const
loco::Node * input(void) const
loco::Node * begin(void) const
SPACE_TO_BATCH_ND in Circle.
loco::Node * block_shape(void) const
loco::Node * input(void) const
loco::Node * paddings(void) const
SPACE_TO_DEPTH in Circle.
int32_t block_size(void) const
loco::Node * input(void) const
SPARSE_TO_DENSE in Circle.
loco::Node * output_shape(void) const
Virtual CIRCLESPLITOUT in Circle.
loco::Node * input(void) const
Virtual CIRCLESPLITVOUT in Circle.
int32_t index(void) const
loco::Node * input(void) const
SQUARED_DIFFERENCE in Circle.
loco::Node * input(void) const
const std::vector< int32_t > & squeeze_dims() const
loco::Node * input(void) const
loco::Node * multiples(void) const
Virtual CIRCLETOPKV2OUT in Circle.
loco::Node * input(void) const
TRANSPOSE_CONV in Circle.
loco::Node * inputSizes(void) const
loco::Node * a(void) const
loco::Node * perm(void) const
UNIDIRECTIONAL_SEQUENCE_LSTM in Circle.
loco::Node * recurrent_to_output_weights(void) const
loco::Node * input(void) const
loco::Node * input(void) const
Virtual CIRCLEUNIQUEOUT in Circle.
int32_t index(void) const
loco::Node * input(void) const
loco::Node * value(void) const
Virtual CIRCLEUNPACKOUT in Circle.
loco::Node * input(void) const
Virtual CircleVariable in Circle for 'variable' Tensor.
Virtual CIRCLEWHILEOUT in Circle.
loco::Node * input(void) const
int32_t index(void) const
uint32_t arity(void) const final
uint32_t arity(void) const final
const luci_interpreter::RuntimeShape output_shape
#define LUCI_ASSERT(condition, msg)
#define VERBOSE(name, lv)
#define DECLARE_USE_SINGLE(NAME)
std::vector< Node * > input_nodes(const Graph *)
DataType
"scalar" value type
loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::TensorShape &y)
loco::NodeShape shape_get(const loco::Node *node)
loco::NodeShape node_shape(const loco::Node *node)
loco::GraphInputIndex index(const TFPlaceholder *node)
std::vector< std::string > split(const std::string &s, char delim)
NodeName name(void) const
uint32_t opnum(void) const final
virtual T visit(const CircleNode *)
Default fallback.
bool recognize(const loco::Dialect *) const final
Return true if this rule recognizes a given dialect.
bool infer(const loco::Node *, loco::NodeShape &) const final
Infer node's shape.