#include <BCast.h>

Collaboration diagram for nnfw::cker::BCastList< N >:

Public Types
typedef std::vector< int32_t >	Vec

Public Member Functions
	BCastList (const Vec(&x)[N], const bool fewer_dims_optimization=true, const bool return_flattened_batch_indices=false)

	~BCastList ()

bool	IsValid () const

bool	IsBroadcastingRequired () const

const Vec &	reshape (int i) const

const Vec &	bcast (int i) const

const Vec &	result_shape () const

const Vec &	output_shape () const

const Vec &	grad_reduce_idx (int i) const

int32_t	output_batch_size () const

const std::vector< int32_t > &	batch_indices (int i) const

Static Protected Member Functions
static void	Reverse (Vec *shape)

Protected Attributes
bool	valid_ = true

bool	broadcasting_required_ = true

Vec	reshape_ [N]

Vec	bcast_ [N]

Vec	result_

Vec	output_

Vec	grad_reduce_idx_ [N]

int32_t	output_batch_size_

std::vector< int32_t >	batch_indices_ [N]

Detailed Description

template<int N>
class nnfw::cker::BCastList< N >

Definition at line 69 of file BCast.h.

Member Typedef Documentation

◆ Vec

template<int N>

typedef std::vector<int32_t> nnfw::cker::BCastList< N >::Vec

Definition at line 76 of file BCast.h.

Constructor & Destructor Documentation

◆ BCastList()

template<int N>

nnfw::cker::BCastList< N >::BCastList	(	const Vec(&)	x[N],
		const bool	fewer_dims_optimization = `true`,
		const bool	return_flattened_batch_indices = `false`
	)

explicit

Definition at line 133 of file BCast.h.

{
  typedef BCastList::Vec Vec;
  bool all_equal = true;
  size_t largest_rank = 0;
  output_batch_size_ = 1;
  for (int i = 0; i < N; ++i)
  {
    if (x[i] != x[0])
    {
      all_equal = false;
    }
    if (x[i].size() > largest_rank)
    {
      largest_rank = x[i].size();
    }
  }
  if (all_equal)
  {
    broadcasting_required_ = false;
  }
  if (all_equal && fewer_dims_optimization)
  {
    // Fast path for common case of identical shapes.
    int32_t elements = 1;
    const int rank = x[0].size();
    output_.resize(rank);
    for (int i = 0; i < rank; i++)
    {
      const int32_t dim = x[0][i];
      elements *= dim;
      output_[i] = dim;
    }
    result_.push_back(elements);
    output_batch_size_ = elements;
    for (int i = 0; i < N; ++i)
    {
      reshape_[i].push_back(elements);
      bcast_[i].push_back(1);
    }
    // grad_reduce_ is left as empty
    return;
  }
 
  // Reverse all the shapes for convenience
  // After the reverse, 0-th is the inner-most dimension.
  Vec copy[N];
  for (int i = 0; i < N; ++i)
  {
    copy[i] = x[i];
    Reverse(&copy[i]);
  }
 
  // 1-extend and align all vectors.
  for (int i = 0; i < N; ++i)
  {
    if (copy[i].size() < largest_rank)
    {
      copy[i].resize(largest_rank, 1);
    }
  }
  // Going through each dimension starting from the inner-most
  // dimension, compares dimension of x and y. They are compatible if
  // they are equal or either is 1.
 
  // indices of j-th component of each input.
  bool prev_is_one[N];
  bool current_is_one[N];
  for (int i = 0; i < N; ++i)
  {
    prev_is_one[i] = false;
    current_is_one[i] = false;
  }
  Vec output;
  bool output_dim_set = false;
  int output_dim = -1;
  bool none_is_one = true;
  bool set_one = false;
  for (size_t j = 0; j < largest_rank; ++j)
  {
    output_dim = -1;
    output_dim_set = false;
    none_is_one = true;
    // Find which indices are 1.
    for (int i = 0; i < N; ++i)
    {
      // Keep track of which indices are 1.
      if (copy[i][j] == 1)
      {
        current_is_one[i] = true;
        none_is_one = false;
      }
      else
      {
        current_is_one[i] = false;
        if (!output_dim_set || copy[i][j] == output_dim)
        {
          output_dim = copy[i][j];
          output_dim_set = true;
        }
        else
        {
          valid_ = false;
          return;
        }
      }
    }
    output_.push_back(output_dim_set ? output_dim : 1);
    output_batch_size_ *= output_.back();
    // All dimensions are 1.
    if (!output_dim_set)
    {
      if (!fewer_dims_optimization)
      {
        for (int i = 0; i < N; ++i)
        {
          bcast_[i].push_back(1);
          reshape_[i].push_back(1);
        }
        result_.push_back(1);
      }
      for (int i = 0; i < N; ++i)
      {
        grad_reduce_idx_[i].push_back(largest_rank - 1 - j);
      }
      // This will skip updating the previous state to the current one. We'll
      // explain why this is safe below.
      // Consider the previous state P, current state C and the next state N.
      // In the case where N also is all ones (N == C), we'll do the same
      // optimization here (push back one dimensions if we need to), which is
      // safe and is expected.
      //
      // When N != C, we'll continue as usual. However, we might trigger the
      // next block if N == P (because we didn't update the previous state).
      // We trigger the next block if `fewer_dims_optimization` is true.
      // This means that we did not modify and broadcast / reshapes in this
      // block (we skipped updating, since the one dimensions can be ignored).
      // In essence, we only need to check whether the previous non-one state is
      // equal to the current non-one state.
 
      continue;
    }
    else if ((fewer_dims_optimization) &&
             std::equal(current_is_one, current_is_one + N, prev_is_one) && set_one)
    {
      // It is a run of the same broadcasting case as last time.
      // We can reshape the input so that fewer dimensions
      // are involved in the intermediate computation.
      result_.back() *= output_dim;
      for (int i = 0; i < N; ++i)
      {
        reshape_[i].back() *= copy[i][j];
        bcast_[i].back() *= current_is_one[i] ? output_dim : 1;
        if (current_is_one[i] && !none_is_one)
        {
          grad_reduce_idx_[i].push_back(largest_rank - 1 - j);
        }
      }
    }
    else
    {
      result_.push_back(output_dim);
      for (int i = 0; i < N; ++i)
      {
        reshape_[i].push_back(copy[i][j]);
        bcast_[i].push_back(current_is_one[i] ? output_dim : 1);
        if (current_is_one[i] && !none_is_one)
        {
          grad_reduce_idx_[i].push_back(largest_rank - 1 - j);
        }
      }
    }
    set_one = true;
    for (int i = 0; i < N; ++i)
    {
      prev_is_one[i] = current_is_one[i];
    }
  }
  if (result_.empty())
  {
    result_.push_back(1);
    for (int i = 0; i < N; ++i)
    {
      reshape_[i].push_back(1);
      bcast_[i].push_back(1);
    }
  }
  // Do something about batches.
  for (int i = 0; i < N; ++i)
  {
    Reverse(&reshape_[i]);
    Reverse(&bcast_[i]);
    Reverse(&grad_reduce_idx_[i]);
  }
  Reverse(&result_);
  Reverse(&output_);
  // Only compute batch indices when we need broadcasting, and we aren't doing
  // needless work (when the output size is 0 or the
  // return_flattened_batch_indices isn't enabled).
  if (return_flattened_batch_indices && broadcasting_required_ && output_batch_size_ > 0)
  {
    for (int i = 0; i < N; ++i)
    {
      ComputeBatchIndices(output_batch_size_, reshape_[i], bcast_[i], &batch_indices_[i]);
    }
  }
}