349 """Generate subgraph of one direction of unrolled RNN layer
352 transformer (_ModelTransformerHelper): helper for model generation
353 X (list of str): names of input tensors in sequence. Tensor shapes: [batch_size, input_size].
354 W (str): name of weight tensor
355 R (str): name of recurrence weight tensor
356 B (str): name of bias tensor
357 initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
358 clip (float or None): range which clips input of activations
359 act (str): activation function
380 if initial_h
is not None:
381 previous_state_tensor = initial_h
384 state_tensor = transformer.make_gemm(X[0], W, B, trans_b=
True)
386 state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
387 previous_state_tensor = transformer.make_act(state_tensor, activation_name)
388 state_tensors += [previous_state_tensor]
390 for i
in range(first_iter, seq_length):
391 state_tensor = transformer.make_gemm(X[i], W, B, trans_b=
True)
392 state_tensor = transformer.make_gemm(previous_state_tensor,
397 state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
398 previous_state_tensor = transformer.make_act(state_tensor, activation_name)
399 state_tensors += [previous_state_tensor]
404 clip, direction, hidden_size, layout):
405 """Generate Simple (forward or reverse) unrolled RNN
408 transformer (_ModelTransformerHelper): transformation helper
409 original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional RNN operation to unroll
410 x (list of str): list of input tensors (input tensor split along "time" dimension)
411 tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
412 activation (str): name of activation function
413 clip (float or None): range which clips input of activations
414 direction (str): "forward" or "reverse"
415 hidden_size (int): size of hidden state
416 layout (int): See attribute description:
417 https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
420 inputs = original_node.input
421 outputs = original_node.output
422 if direction ==
'reverse':
424 w = transformer.make_squeeze(inputs[1], axes=[0])
425 r = transformer.make_squeeze(inputs[2], axes=[0])
426 if len(inputs) > 3
and inputs[3] !=
'':
427 raw_bias_tensor = transformer.make_squeeze(inputs[3], axes=[0])
428 splitted_bias_tensors = transformer.make_split(raw_bias_tensor,
429 split_sizes=[hidden_size] * 2,
431 b = transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
434 b = transformer.make_constant_tensor(np.zeros(hidden_size, dtype=data_type),
436 if len(inputs) > 5
and inputs[5] !=
'':
437 direction_dim = layout
438 initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
443 y_direction_dim = layout + 1
444 y_h_direction_dim = layout
445 state_layout_tensors = []
446 seq_length_dim = layout
447 for state
in state_tensors:
448 state_layout_tensors += [
449 transformer.make_unsqueeze(state, axes=[seq_length_dim, y_direction_dim])
454 transformer.make_node(
'Unsqueeze', [state_tensors[-1]], [Y_h],
455 axes=[y_h_direction_dim])
457 transformer.make_node(
'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
461 clip, hidden_size, layout):
462 """Generate Bidirectional unrolled RNN
465 transformer (_ModelTransformerHelper): transformation helper
466 original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional RNN operation to unroll
467 x (list of str): list of input tensors (input tensor split along "time" dimension)
468 tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
469 activations (list of str): list of len (2) containing names of forward and reverse activations
470 clip (float or None): range which clips input of activations
471 hidden_size (int): size of hidden state
472 layout (int): See attribute description:
473 https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
476 inputs = original_node.input
477 outputs = original_node.output
478 w_bi = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
479 r_bi = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
483 w += [transformer.make_squeeze(w_bi[d], axes=[0])]
484 r += [transformer.make_squeeze(r_bi[d], axes=[0])]
487 if len(inputs) > 3
and inputs[3] !=
'':
488 raw_bias_tensors = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
490 raw_bias_tensors_squeezed = transformer.make_squeeze(raw_bias_tensors[d],
492 splitted_bias_tensors = transformer.make_split(raw_bias_tensors_squeezed,
493 split_sizes=[hidden_size] * 2,
496 transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
501 transformer.make_constant_tensor(np.zeros(hidden_size, dtype=data_type),
504 initial_h = [
None,
None]
505 if len(inputs) > 5
and inputs[5] !=
'':
506 direction_dim = layout
507 initial_h = transformer.make_split(inputs[5],
511 initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
514 initial_h[0], clip, activations[0])
517 initial_h[1], clip, activations[1])
518 state_b_tensors.reverse()
520 y_direction_dim = layout + 1
521 y_h_direction_dim = layout
522 state_layout_tensors = []
523 seq_length_dim = layout
525 for t
in range(seq_length):
526 state_f = state_f_tensors[t]
527 state_b = state_b_tensors[t]
528 state_layout_tensors_f = transformer.make_unsqueeze(
529 state_f, axes=[seq_length_dim, y_direction_dim])
530 state_layout_tensors_b = transformer.make_unsqueeze(
531 state_b, axes=[seq_length_dim, y_direction_dim])
532 state_layout_tensors += [
533 transformer.make_concat([state_layout_tensors_f, state_layout_tensors_b],
534 axis=y_direction_dim)
537 last_f_state_layout_tensor = transformer.make_unsqueeze(state_f_tensors[-1],
538 axes=[y_h_direction_dim])
539 last_b_state_layout_tensor = transformer.make_unsqueeze(state_b_tensors[0],
540 axes=[y_h_direction_dim])
544 transformer.make_node(
'Concat',
545 [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
546 axis=y_h_direction_dim)
549 transformer.make_node(
'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
553 """Unroll RNN operation
556 transformer (_ModelTransformerHelper): transformation helper
557 tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
558 node (onnx.onnx_ml_pb2.NodeProto): RNN operation to unroll
561 if len(inputs) > 4
and inputs[4] !=
'':
562 raise NotImplementedError(
'Variadic length of output is not supported')
564 activation_alpha = []
566 activations = [
'Tanh',
'Tanh']
568 direction =
'forward'
572 for attr
in node.attribute:
573 if attr.name ==
'activation_alpha':
574 activation_alpha = attr.floats
575 if attr.name ==
'activation_beta':
576 activation_beta = attr.floats
577 if attr.name ==
'activations':
578 activations = list(map(
lambda item: item.decode(
'UTF-8'), list(attr.strings)))
579 if attr.name ==
'clip':
581 if attr.name ==
'direction':
582 direction = attr.s.decode(
'UTF-8')
583 if attr.name ==
'hidden_size':
585 if attr.name ==
'layout':
588 if len(activation_alpha) > 0
or len(activation_beta) > 0:
589 raise NotImplementedError(
'Unsupported parameters for LSTM activations')
591 for act
in activations:
592 if act
not in [
'Relu',
'Tanh',
'Sigmoid']:
593 raise NotImplementedError(
'Unsupported activation function')
595 seq_length_dim = layout
596 seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
598 hidden_size = tensor_infos[inputs[2]].shape[2]
600 input_split_tensor = transformer.make_split(inputs[0],
601 split_sizes=[1] * seq_length,
604 for i
in range(len(input_split_tensor)):
605 input_frame_tensor = input_split_tensor[i]
606 squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
607 x += [squeezed_frame_tensor]
609 if direction
in [
'forward',
'reverse']:
611 clip, direction, hidden_size, layout)
612 elif direction ==
'bidirectional':
614 clip, hidden_size, layout)
616 raise RuntimeError(
'Unknown RNN type')
618 transformer.mark_for_deletion(node)
622 act, dtype, hidden_size, batch_size):
623 """Generate subgraph for one direction of unrolled LSTM layer
626 transformer (_ModelTransformerHelper): helper for model generation
627 X (list of str): names of tensors in input sequence. Each tensor shape: [batch_size, input_size]
628 W (str): name of concatenated weight tensor: [input, output, forget, cell]
629 R (str): name of concatenated recurrence weights tensor: [input, output, forget, cell]
630 B (str): name of concatenated bias tensor: [input, output, forget, cell]
631 initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
632 initial_c (str or None): name of tensor containing initial cell state. Shape [batch_size, hidden_size]
633 P (str or None): name of concatenated peephole tensor: [input, output, forget]
634 clip (float or None): range which clips input of activations
635 act (dict of str): activation functions {'f': 'Sigmoid', 'g': 'Tanh', 'h': 'Tanh'}
636 dtype (numpy dtype): data type used in created LSTM operation
637 hidden_size (int): hidden dimension
638 batch_size (int): batch dimension
673 w_tensors = transformer.make_split(W, split_sizes=[hidden_size] * 4, axis=0)
674 W = {
'i': w_tensors[0],
'o': w_tensors[1],
'f': w_tensors[2],
'c': w_tensors[3]}
676 r_tensors = transformer.make_split(R, split_sizes=[hidden_size] * 4, axis=0)
677 R = {
'i': r_tensors[0],
'o': r_tensors[1],
'f': r_tensors[2],
'c': r_tensors[3]}
680 separate_b_tensors = transformer.make_split(B,
681 split_sizes=[hidden_size] * 8,
686 transformer.make_add(separate_b_tensors[i], separate_b_tensors[i + 4])
690 transformer.make_constant_tensor(np.zeros(
691 (hidden_size), dtype=dtype),
'zero_b')
693 B = {
'i': b_tensors[0],
'o': b_tensors[1],
'f': b_tensors[2],
'c': b_tensors[3]}
695 if initial_h
is not None:
696 previous_h_state_tensor = initial_h
698 previous_h_state_tensor = transformer.make_constant_tensor(
699 np.zeros((batch_size, hidden_size), dtype=dtype),
'initial_h')
701 if initial_c
is not None:
702 previous_c_state_tensor = initial_c
704 previous_c_state_tensor = transformer.make_constant_tensor(
705 np.zeros((batch_size, hidden_size), dtype=dtype),
'initial_c')
708 p_tensors = transformer.make_split(P, split_sizes=[hidden_size] * 3, axis=0)
709 P = {
'i': p_tensors[0],
'o': p_tensors[1],
'f': p_tensors[2]}
711 zero = transformer.make_constant_tensor(np.zeros((hidden_size), dtype=dtype),
713 P = {
'i': zero,
'o': zero,
'f': zero}
715 for i
in range(seq_length):
717 it = transformer.make_gemm(X[i], W[
'i'], B[
'i'], trans_b=
True)
718 it = transformer.make_gemm(previous_h_state_tensor, R[
'i'], it, trans_b=
True)
719 peephole_it = transformer.make_mul(P[
'i'], previous_c_state_tensor)
720 it = transformer.make_add(it, peephole_it)
722 it = transformer.make_clip(it, min=-clip, max=clip)
723 it = transformer.make_act(it, act[
'f'])
726 ft = transformer.make_gemm(X[i], W[
'f'], B[
'f'], trans_b=
True)
727 ft = transformer.make_gemm(previous_h_state_tensor, R[
'f'], ft, trans_b=
True)
728 peephole_ft = transformer.make_mul(P[
'f'], previous_c_state_tensor)
729 ft = transformer.make_add(ft, peephole_ft)
731 ft = transformer.make_clip(ft, min=-clip, max=clip)
732 ft = transformer.make_act(ft, act[
'f'])
735 ct = transformer.make_gemm(X[i], W[
'c'], B[
'c'], trans_b=
True)
736 ct = transformer.make_gemm(previous_h_state_tensor, R[
'c'], ct, trans_b=
True)
738 ct = transformer.make_clip(ct, min=-clip, max=clip)
739 ct = transformer.make_act(ct, act[
'g'])
742 ft_Ct = transformer.make_mul(ft, previous_c_state_tensor)
743 it_ct = transformer.make_mul(it, ct)
744 Ct = transformer.make_add(ft_Ct, it_ct)
745 previous_c_state_tensor = Ct
748 ot = transformer.make_gemm(X[i], W[
'o'], B[
'o'], trans_b=
True)
749 ot = transformer.make_gemm(previous_h_state_tensor, R[
'o'], ot, trans_b=
True)
750 peephole_ot = transformer.make_mul(P[
'o'], Ct)
751 ot = transformer.make_add(ot, peephole_ot)
753 ot = transformer.make_clip(ot, min=-clip, max=clip)
754 ot = transformer.make_act(ot, act[
'f'])
757 Ht = transformer.make_act(Ct, act[
'h'])
758 Ht = transformer.make_mul(ot, Ht)
759 previous_h_state_tensor = Ht
760 state_h_tensors += [Ht]
762 return (state_h_tensors, previous_c_state_tensor)
766 activations, clip, direction, hidden_size, layout):
767 """Generate Simple (forward or reverse) unrolled LSTM
770 transformer (_ModelTransformerHelper): transformation helper
771 original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional LSTM operation to unroll
772 x (list of str): list of input tensors (input tensor split along "time" dimension)
773 tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
774 activations (list of str): list of length 3 containing names of activation functions
775 clip (float or None): range which clips input of activations
776 direction (str): "forward" or "reverse"
777 hidden_size (int): size of hidden state
778 layout (int): See attribute description:
779 https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
782 inputs = original_node.input
783 outputs = original_node.output
784 if direction ==
'reverse':
786 w = transformer.make_squeeze(inputs[1], axes=[0])
787 r = transformer.make_squeeze(inputs[2], axes=[0])
790 if len(inputs) > 3
and inputs[3] !=
'':
791 b = transformer.make_squeeze(inputs[3], axes=[0])
794 if len(inputs) > 5
and inputs[5] !=
'':
795 direction_dim = layout
796 initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
799 if len(inputs) > 6
and inputs[6] !=
'':
800 direction_dim = layout
801 initial_c = transformer.make_squeeze(inputs[6], axes=[direction_dim])
804 if len(inputs) > 7
and inputs[7] !=
'':
805 p = transformer.make_squeeze(inputs[7], axes=[0])
808 batch_size = tensor_infos[inputs[0]].shape[1 - layout]
810 act = {
'f': activations[0],
'g': activations[1],
'h': activations[2]}
813 transformer, x, w, r, b, initial_h, initial_c, p, clip, act, dtype, hidden_size,
816 y_direction_dim = layout + 1
817 y_h_direction_dim = layout
818 state_layout_tensors = []
819 seq_length_dim = layout
820 for h_state
in state_h_tensors:
821 state_layout_tensors += [
822 transformer.make_unsqueeze(h_state, axes=[seq_length_dim, y_direction_dim])
827 transformer.make_node(
'Unsqueeze', [state_h_tensors[-1]], [Y_h],
828 axes=[y_h_direction_dim])
830 transformer.make_node(
'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
831 if direction ==
'reverse':
832 state_layout_tensors.reverse()
834 transformer.make_node(
'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
838 activations, clip, hidden_size, layout):
839 """Generate Bidirectional unrolled LSTM
842 transformer (_ModelTransformerHelper): transformation helper
843 original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional LSTM operation to unroll
844 x (list of str): list of input tensors (input tensor split along "time" dimension)
845 tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
846 activations (list of str): list of length 6, containing names of forward and reverse activations
847 clip (float or None): range which clips input of activations
848 hidden_size (int): size of hidden state
849 layout (int): See attribute description:
850 https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
853 inputs = original_node.input
854 outputs = original_node.output
856 w = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
857 r = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
859 w[d] = transformer.make_squeeze(w[d], axes=[0])
860 r[d] = transformer.make_squeeze(r[d], axes=[0])
863 if len(inputs) > 3
and inputs[3] !=
'':
864 b = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
866 b[d] = transformer.make_squeeze(b[d], axes=[0])
868 initial_h = [
None,
None]
869 if len(inputs) > 5
and inputs[5] !=
'':
870 direction_dim = layout
871 initial_h = transformer.make_split(inputs[5],
875 initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
877 initial_c = [
None,
None]
878 if len(inputs) > 6
and inputs[6] !=
'':
879 direction_dim = layout
880 initial_c = transformer.make_split(inputs[6],
884 initial_c[d] = transformer.make_squeeze(initial_c[d], axes=[direction_dim])
887 if len(inputs) > 7
and inputs[7] !=
'':
888 p = transformer.make_split(inputs[7], split_sizes=[1, 1], axis=0)
890 p[d] = transformer.make_squeeze(p[d], axes=[0])
893 batch_size = tensor_infos[inputs[0]].shape[1 - layout]
906 transformer, x, w[0], r[0], b[0], initial_h[0], initial_c[0], p[0], clip, act[0],
907 dtype, hidden_size, batch_size)
910 transformer, x, w[1], r[1], b[1], initial_h[1], initial_c[1], p[1], clip, act[1],
911 dtype, hidden_size, batch_size)
912 state_b_h_tensors.reverse()
914 y_direction_dim = layout + 1
915 y_c_direction_dim = layout
916 state_layout_tensors = []
917 seq_length_dim = layout
918 for f_h_state, b_h_state
in zip(state_f_h_tensors, state_b_h_tensors):
919 state_f_layout_tensors = transformer.make_unsqueeze(
920 f_h_state, axes=[seq_length_dim, y_direction_dim])
921 state_b_layout_tensors = transformer.make_unsqueeze(
922 b_h_state, axes=[seq_length_dim, y_direction_dim])
923 state_layout_tensors += [
924 transformer.make_concat([state_f_layout_tensors, state_b_layout_tensors],
925 axis=y_direction_dim)
928 last_f_state_layout_tensor = transformer.make_unsqueeze(state_f_h_tensors[-1],
929 axes=[y_c_direction_dim])
930 last_b_state_layout_tensor = transformer.make_unsqueeze(state_b_h_tensors[0],
931 axes=[y_c_direction_dim])
934 transformer.make_node(
'Concat',
935 [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
936 axis=y_c_direction_dim)
938 Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim])
939 Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim])
941 transformer.make_node(
'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
944 transformer.make_node(
'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
948 """Unroll LSTM operation
951 transformer (_ModelTransformerHelper): transformation helper
952 tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
953 node (onnx.onnx_ml_pb2.NodeProto): LSTM operation to unroll
956 if len(inputs) > 4
and inputs[4] !=
'':
957 raise NotImplementedError(
'Variadic length of output is not supported')
959 activation_alpha = []
961 activations = [
'Sigmoid',
'Tanh',
'Tanh'] * 2
963 direction =
'forward'
968 for attr
in node.attribute:
969 if attr.name ==
'activation_alpha':
970 activation_alpha = attr.floats
971 if attr.name ==
'activation_beta':
972 activation_beta = attr.floats
973 if attr.name ==
'activations':
974 activations = list(map(
lambda item: item.decode(
'UTF-8'), list(attr.strings)))
975 if attr.name ==
'clip':
977 if attr.name ==
'direction':
978 direction = attr.s.decode(
'UTF-8')
979 if attr.name ==
'hidden_size':
981 if attr.name ==
'input_forget':
982 input_forget = attr.i
983 if attr.name ==
'layout':
986 if len(activation_alpha) > 0
or len(activation_beta) > 0:
987 raise NotImplementedError(
'Unsupported parameters for LSTM activations')
989 for act
in activations:
990 if act
not in [
'Relu',
'Tanh',
'Sigmoid']:
991 raise NotImplementedError(
'Unsupported activation function')
993 if input_forget != 0:
994 raise NotImplementedError(
'Unsupported input_forget attribute value')
996 seq_length_dim = layout
997 seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
999 hidden_size = tensor_infos[inputs[2]].shape[2]
1001 input_split_tensor = transformer.make_split(inputs[0],
1002 split_sizes=[1] * seq_length,
1003 axis=seq_length_dim)
1005 for i
in range(len(input_split_tensor)):
1006 input_frame_tensor = input_split_tensor[i]
1007 squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
1008 x += [squeezed_frame_tensor]
1010 if direction
in [
'forward',
'reverse']:
1012 clip, direction, hidden_size, layout)
1013 elif direction ==
'bidirectional':
1015 clip, hidden_size, layout)
1017 raise RuntimeError(
'Unknown LSTM type')
1019 transformer.mark_for_deletion(node)