ONE/exo_2src_2_pass_2_fuse_instance_norm_pass_8cpp_source.html

/*

 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *    http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#include "FuseInstanceNormPass.h"


#include "Dialect/IR/TFLNodes.h"

#include "Dialect/IR/CircleNodes.h"


#include <loco/Service/ShapeInference.h>


#include <cassert>

#include <set>


// Helper to find commutative node's arguments

namespace

{


template <class ARG_TYPE_1, class ARG_TYPE_2> class NodeFiller final

{

public:

  NodeFiller(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2) : _arg_1(arg_1), _arg_2(arg_2)

  {

    // DO NOTHING

  }


  template <class COMM_NODE> bool with_commutative_args_of(const COMM_NODE *node);


private:

  ARG_TYPE_1 **_arg_1;

  ARG_TYPE_2 **_arg_2;

};


template <class ARG_TYPE_1, class ARG_TYPE_2>

inline NodeFiller<ARG_TYPE_1, ARG_TYPE_2> fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)

{

  return NodeFiller<ARG_TYPE_1, ARG_TYPE_2>{arg_1, arg_2};

}


template <class ARG_TYPE_1, class ARG_TYPE_2>

template <class COMM_NODE>

bool NodeFiller<ARG_TYPE_1, ARG_TYPE_2>::with_commutative_args_of(const COMM_NODE *node)

{

  // Case 1) X == ARG_TYPE_1 / Y == ARG_TYPE_2

  {

    auto x = dynamic_cast<ARG_TYPE_1 *>(node->x());

    auto y = dynamic_cast<ARG_TYPE_2 *>(node->y());


    if (x && y)

    {

      *_arg_1 = x;

      *_arg_2 = y;

      return true;

    }

  }


  // Case 2) X == ARG_TYPE_2 / Y == ARG_TYPE_1

  {

    auto x = dynamic_cast<ARG_TYPE_2 *>(node->x());

    auto y = dynamic_cast<ARG_TYPE_1 *>(node->y());


    if (x && y)

    {

      *_arg_1 = y;

      *_arg_2 = x;

      return true;

    }

  }


  return false;

}


} // namespace


// Helper to check detail

namespace

{


bool is_1D_with_dummy_dim(locoex::TFLConst *node, uint32_t depth)

{

  auto rank = node->rank();

  uint32_t axis;

  for (axis = 0; axis < rank - 1; ++axis)

  {

    if (node->dim(axis).value() != 1)

      return false;

  }

  return node->dim(axis).value() == depth;

}


bool is_instance_mean(locoex::TFLMean *mean)

{

  //

  // CHECK 1) input is rank 4

  //

  auto input = mean->input();

  if (not loco::shape_known(input))

    return false;

  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();

  if (input_shape.rank() != 4)

    return false;


  //

  // CHECK 2) 'reduction indices' is TFLConst of value [1,2], that is HW of NHWC

  //

  // TODO Support equivalent case, like [-3,-2]

  // TODO Support non-Const case?

  // TODO What if input is NCHW format in Circle?

  auto red_indices = dynamic_cast<locoex::TFLConst *>(mean->reduction_indices());

  if (not red_indices)

    return false;

  if (red_indices->rank() != 1)

    return false;

  std::set<int32_t> red_indices_set;

  {

    // TODO Currently only support S32, support other types

    assert(red_indices->dtype() == loco::DataType::S32);

    for (uint32_t i = 0; i < red_indices->dim(0).value(); ++i)

      red_indices_set.insert(red_indices->at<loco::DataType::S32>(i));

  }

  if (red_indices_set.size() != 2)

    return false;

  if (red_indices_set.find(1) == red_indices_set.end())

    return false;

  if (red_indices_set.find(2) == red_indices_set.end())

    return false;


  //

  // CHECK 3) keep_dims == true (?)

  //

  // We only have case of 'keep_dims == true' so far, but it might be okay with 'keep_dims == false'

  // TODO Check this fact, and if true, return true regardless of keep_dims

  return mean->keep_dims();

}


} // namespace


// Helper to fuse Instance Norm

namespace

{


class InstanceNormPattern final

{

public:

  InstanceNormPattern(locoex::TFLAdd *candidate)

  {

    assert(candidate);

    add_as_terminal = candidate;

  }


public:

  bool matched();

  bool matched() const { return _matched; }


public:

  // Context

  loco::Node *ifm = nullptr;

  locoex::TFLMean *mean_of_ifm = nullptr;

  locoex::TFLSquaredDifference *sqdiff = nullptr;

  locoex::TFLMean *mean_as_variance = nullptr;

  locoex::TFLConst *const_as_epsilon = nullptr;

  locoex::TFLAdd *add_as_variance = nullptr;

  locoex::TFLRsqrt *rsqrt = nullptr;

  locoex::TFLConst *const_as_gamma = nullptr;

  locoex::TFLMul *mul_gamma = nullptr;

  locoex::TFLMul *mul_as_scaled_ifm = nullptr;

  locoex::TFLMul *mul_as_scaled_mean = nullptr;

  locoex::TFLConst *const_as_beta = nullptr;

  locoex::TFLSub *sub = nullptr;

  locoex::TFLAdd *add_as_terminal = nullptr;


private:

  bool _matched = false;

};


bool InstanceNormPattern::matched()

{

  if (_matched)

    return true;


#define CHECK_OR_FALSE(condition) \

  if (not(condition))             \

    return false;


  // Check order is DFS


  CHECK_OR_FALSE(fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));

  CHECK_OR_FALSE(fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));


  CHECK_OR_FALSE(loco::shape_known(ifm));

  auto ifm_shape = loco::shape_get(ifm);

  CHECK_OR_FALSE(ifm_shape.domain() == loco::Domain::Tensor);

  auto ifm_tensor_shape = ifm_shape.as<loco::TensorShape>();

  CHECK_OR_FALSE(ifm_tensor_shape.rank() == 4);

  uint32_t ifm_channel_depth = ifm_tensor_shape.dim(3).value();


  CHECK_OR_FALSE(fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));

  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));


  add_as_variance = dynamic_cast<locoex::TFLAdd *>(rsqrt->x());

  CHECK_OR_FALSE(add_as_variance);


  CHECK_OR_FALSE(

    fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));


  CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);

  // TODO Support regarding broadcast

  CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);


  CHECK_OR_FALSE(is_instance_mean(mean_as_variance));

  sqdiff = dynamic_cast<locoex::TFLSquaredDifference *>(mean_as_variance->input());

  CHECK_OR_FALSE(sqdiff);


  loco::Node *ifm_should_be = nullptr;

  CHECK_OR_FALSE(fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));

  CHECK_OR_FALSE(ifm == ifm_should_be);

  CHECK_OR_FALSE(is_instance_mean(mean_of_ifm));

  CHECK_OR_FALSE(ifm == mean_of_ifm->input());


  const_as_beta = dynamic_cast<locoex::TFLConst *>(sub->x());

  CHECK_OR_FALSE(const_as_beta);

  CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));


  mul_as_scaled_mean = dynamic_cast<locoex::TFLMul *>(sub->y());

  CHECK_OR_FALSE(mul_as_scaled_mean);


  locoex::TFLMul *mul_gamma_should_be = nullptr;

  locoex::TFLMean *mean_of_ifm_should_be = nullptr;

  CHECK_OR_FALSE(fill(&mul_gamma_should_be, &mean_of_ifm_should_be)

                   .with_commutative_args_of(mul_as_scaled_mean));

  CHECK_OR_FALSE(mul_gamma == mul_gamma_should_be);

  CHECK_OR_FALSE(mean_of_ifm == mean_of_ifm_should_be);

#undef CHECK_OR_FALSE

  _matched = true;

  return true;

}


void fuse_instance_norm(const InstanceNormPattern &p)

{

  assert(p.matched());


  auto graph = p.add_as_terminal->graph();


  // Make reshape for gamma & beta

  auto reshape_gamma = graph->nodes()->create<locoex::TFLReshape>();

  auto reshape_beta = graph->nodes()->create<locoex::TFLReshape>();

  {

    auto ifm_shape = loco::shape_get(p.ifm).as<loco::TensorShape>();

    uint32_t ifm_channel_depth = ifm_shape.dim(3).value();


    int32_t new_shape[1] = {static_cast<int32_t>(ifm_channel_depth)};


    reshape_gamma->tensor(p.const_as_gamma);

    reshape_beta->tensor(p.const_as_beta);


    locoex::set_new_shape(reshape_gamma, new_shape, 1);

    locoex::set_new_shape(reshape_beta, new_shape, 1);

  }


  // Make Instance Norm to replace

  auto instance_norm = graph->nodes()->create<locoex::CircleInstanceNorm>();

  instance_norm->input(p.ifm);

  instance_norm->gamma(reshape_gamma);

  instance_norm->beta(reshape_beta);

  float epsilon = p.const_as_epsilon->at<loco::DataType::FLOAT32>(0);

  instance_norm->epsilon(epsilon);

  instance_norm->fusedActivationFunction(p.add_as_terminal->fusedActivationFunction());


  replace(p.add_as_terminal).with(instance_norm);

}


} // namespace


namespace exo

{


bool FuseInstanceNormPass::run(loco::Graph *g)

{

  bool changed = false;

  for (auto node : loco::active_nodes(loco::output_nodes(g)))

  {

    auto add = dynamic_cast<locoex::TFLAdd *>(node);

    if (not add)

      continue;


    InstanceNormPattern pattern(add);

    if (not pattern.matched())

      continue;


    fuse_instance_norm(pattern);

    changed = true;

  }


  return changed;

}

bool FuseInstanceNormPass::run(loco::Graph *g) {…}


} // namespace exo

TFLNodes.h

loco::Dimension::value
uint32_t value(void) const
Return the value.
Definition Dimension.h:51

loco::Graph
A neural network graph.
Definition Graph.h:161

loco::Node
Logical unit of computation.
Definition Node.h:54

loco::NodeShape::as
ShapeType as(void) const

loco::Subst< SubstQualifier::Default >::with
void with(Node *into) const
Definition Node.cpp:66

loco::TensorShape
Definition TensorShape.h:29

loco::TensorShape::dim
const Dimension & dim(uint32_t axis) const
Definition TensorShape.h:38

locoex::CircleInstanceNorm
INSTANCE_NORM in circle.
Definition CircleNodes.h:58

locoex::CircleInstanceNorm::input
loco::Node * input(void) const
Definition CircleNodes.h:61

locoex::TFLAdd
ADD in TensorFlow Lite.
Definition TFLNodes.h:116

locoex::TFLConst
Class to build tensor data.
Definition TFLNodes.h:198

locoex::TFLMean
Definition TFLNodes.h:354

locoex::TFLMean::input
loco::Node * input(void) const
Definition TFLNodes.h:356

locoex::TFLMean::keep_dims
bool keep_dims(void) const
Definition TFLNodes.h:363

locoex::TFLMean::reduction_indices
loco::Node * reduction_indices(void) const
Definition TFLNodes.h:359

locoex::TFLMul
MUL in TensorFlow Lite.
Definition TFLNodes.h:375

locoex::TFLReshape
Definition TFLNodes.h:405

locoex::TFLReshape::tensor
loco::Node * tensor(void) const
Definition TFLNodes.h:410

locoex::TFLRsqrt
Definition TFLNodes.h:448

locoex::TFLSquaredDifference
Definition TFLNodes.h:471

locoex::TFLSub
SUB in TensorFlow Lite.
Definition TFLNodes.h:488

ShapeInference.h

CircleNodes.h

CHECK_OR_FALSE
#define CHECK_OR_FALSE(condition)

FuseInstanceNormPass.h

is_1D_with_dummy_dim
bool is_1D_with_dummy_dim(luci::CircleConst *node, uint32_t depth)
Definition FuseInstanceNormPass.cpp:32

GenH5RandomInputs.graph
graph
Definition GenH5RandomInputs.py:48

exo
Definition CircleExporter.h:25

loco::shape_known
bool shape_known(const Node *node)
Definition ShapeInference.h:60

loco::active_nodes
std::set< loco::Node * > active_nodes(const std::vector< loco::Node * > &roots)
Enumerate all the nodes required to compute "roots".
Definition Algorithm.cpp:112

loco::output_nodes
std::vector< Node * > output_nodes(Graph *)
Definition Graph.cpp:101

loco::shape_get
NodeShape shape_get(const Node *node)
Definition ShapeInference.h:61

loco::Domain::Tensor
@ Tensor

loco::replace
Subst< SubstQualifier::Default > replace(Node *node)
Definition Node.cpp:82

locoex::set_new_shape
void set_new_shape(locoex::TFLReshape *node, int32_t *base, uint32_t size)
Set both TFLReshape's 2nd input as TFLConst, and newShape attribute with same value.
Definition TFLNodes.cpp:67

luci::fill
NodeFiller< ARG_TYPE_1, ARG_TYPE_2 > fill(ARG_TYPE_1 **arg_1, ARG_TYPE_2 **arg_2)
Definition NodeFiller.h:72

mir_caffe::CaffeOpType::input
@ input

p
Configuration p
Definition Convolution.cpp:289

exo::FuseInstanceNormPass::run
bool run(loco::Graph *g) final
Run the pass.
Definition FuseInstanceNormPass.cpp:382