ONE - On-device Neural Engine
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
onert::exec::ExecTime Class Reference

#include <ExecTime.h>

Public Member Functions

 ExecTime (const std::vector< const backend::Backend * > &backends)
 
int64_t getOperationExecTime (const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size) const
 Get exec time of an operation with input size or linearly interpolated value based on size if there is no record for given size.
 
void updateOperationExecTime (const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size, int64_t time)
 Update exec time of the operation on a backend with given input size or add new entity if there is no one.
 
int64_t getPermuteTime (const backend::Backend *from_backend, const backend::Backend *to_backend, bool quant, uint32_t op_size) const
 Get the permute time from one backend to another.
 
void updatePermuteTime (const backend::Backend *from_backend, const backend::Backend *to_backend, bool quant, uint32_t op_size, int64_t time)
 Update permute time from one backend to another.
 
void storeOperationsExecTime () const
 Update metrics file with new data.
 

Static Public Member Functions

static int64_t getMax ()
 Get the max value of int32_t in int64_t.
 

Static Public Attributes

static const int64_t NOT_FOUND = -1
 

Detailed Description

Definition at line 31 of file ExecTime.h.

Constructor & Destructor Documentation

◆ ExecTime()

onert::exec::ExecTime::ExecTime ( const std::vector< const backend::Backend * > &  backends)
inlineexplicit

Definition at line 34 of file ExecTime.h.

35 : _json(backends, _measurements)
36 {
37 }

Member Function Documentation

◆ getMax()

static int64_t onert::exec::ExecTime::getMax ( )
inlinestatic

Get the max value of int32_t in int64_t.

Returns
max value

Definition at line 91 of file ExecTime.h.

91{ return _MAX; }

Referenced by updateOperationExecTime().

◆ getOperationExecTime()

int64_t onert::exec::ExecTime::getOperationExecTime ( const backend::Backend backend,
const std::string &  operation,
bool  quant,
uint32_t  op_size 
) const

Get exec time of an operation with input size or linearly interpolated value based on size if there is no record for given size.

Parameters
[in]backendid of a backend
[in]operationname of an operation
[in]quantif input type quantized
[in]op_sizesum of operation's flattened sizes of inputs and outputs
Returns
execution time for given input sizes -1 if there are no records for given parameters (backend, op, quantization).

Definition at line 25 of file ExecTime.cc.

28{
29 auto found_backend = _measurements.find(backend);
30 if (found_backend == _measurements.end())
31 return NOT_FOUND; // no execution time for this backend
32
33 auto found_operation_with_type = found_backend->second.find(operation);
34 if (found_operation_with_type == found_backend->second.end())
35 // no execution time for this operation
36 return NOT_FOUND;
37
38 auto found_operation = found_operation_with_type->second.find(quant);
39 if (found_operation == found_operation_with_type->second.end())
40 // no execution time for this operation
41 return NOT_FOUND;
42
43 auto found_size = found_operation->second.find(op_size);
44 if (found_size != found_operation->second.end())
45 return found_size->second; // found execution time
46
47 // Try to interpolate
48 if (found_operation->second.size() < 2)
49 // not possible to do linear interpolation
50 return found_operation->second.begin()->second;
51
52 // if we reach here, then this means, that there is no record, that is equal to op_size
53 auto upper_bound = found_operation->second.upper_bound(op_size); // > op_size
54 auto lower_bound = upper_bound;
55
56 if (upper_bound == found_operation->second.end()) // all values <= op_size
57 {
58 upper_bound--;
59 lower_bound = upper_bound;
60 lower_bound--;
61 }
62 else if (upper_bound == found_operation->second.begin()) // all values > op_size
63 {
64 upper_bound++;
65 }
66 else // op_size between
67 {
68 lower_bound--;
69 }
70
71 // Linear interpolation
72 const auto x0 = static_cast<int64_t>(lower_bound->first); // size
73 const auto x1 = static_cast<int64_t>(upper_bound->first); // size
74 const int64_t y0 = lower_bound->second; // time
75 const int64_t y1 = upper_bound->second; // time
76 const auto x = static_cast<int64_t>(op_size);
77
78 int64_t interpolated_value = y0 + (x - x0) * (y1 - y0) / (x1 - x0);
79
80 // In some cases ops with smaller inputs is executed slower than the one
81 // with larger inputs, more likely because of a backend's load difference
82 if (interpolated_value < 0 && x > x1)
83 {
84 return y0;
85 }
86 // It must be non-positive ONLY if it's lesser than both of them
87 assert(interpolated_value > 0 || x < x0);
88
89 // execution time must be non-negative
90 return std::max<int64_t>(interpolated_value, 1);
91}
static const int64_t NOT_FOUND
Definition ExecTime.h:96

References NOT_FOUND.

Referenced by getPermuteTime().

◆ getPermuteTime()

int64_t onert::exec::ExecTime::getPermuteTime ( const backend::Backend from_backend,
const backend::Backend to_backend,
bool  quant,
uint32_t  op_size 
) const

Get the permute time from one backend to another.

Parameters
[in]from_backend
[in]to_backend
[in]quantif input type quantized
[in]op_sizesum of operation's flattened sizes of inputs and outputs
Returns
permutation time for operation size

Definition at line 125 of file ExecTime.cc.

128{
129 return getOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size);
130}
int64_t getOperationExecTime(const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size) const
Get exec time of an operation with input size or linearly interpolated value based on size if there i...
Definition ExecTime.cc:25

References onert::backend::Backend::config(), and getOperationExecTime().

◆ storeOperationsExecTime()

void onert::exec::ExecTime::storeOperationsExecTime ( ) const
inline

Update metrics file with new data.

Definition at line 95 of file ExecTime.h.

95{ _json.storeOperationsExecTime(); }
void storeOperationsExecTime() const
Update _measurement_file with new data.

References onert::exec::JSON::storeOperationsExecTime().

◆ updateOperationExecTime()

void onert::exec::ExecTime::updateOperationExecTime ( const backend::Backend backend,
const std::string &  operation,
bool  quant,
uint32_t  op_size,
int64_t  time 
)

Update exec time of the operation on a backend with given input size or add new entity if there is no one.

Parameters
[in]backendid of a backend
[in]operationname of an operation
[in]quantif input type quantized
[in]op_sizesum of operation's flattened sizes of inputs and outputs
[in]timereal measured value

Definition at line 93 of file ExecTime.cc.

96{
97 // If the op is not implemented for some input, it should not be scheduled
98 const auto &recs = _measurements[backend][operation][quant];
99 if (time == getMax() ||
100 std::any_of(recs.begin(), recs.end(),
101 [](std::pair<const uint32_t, const int64_t> p) { return p.second == getMax(); }))
102 {
103 _measurements[backend][operation][quant].clear();
104 _measurements[backend][operation][quant].emplace(op_size, getMax());
105 }
106 else
107 {
108 auto it = _measurements[backend][operation][quant].emplace(op_size, time);
109 if (!it.second)
110 {
111 // affect of the last measurement is bigger than the previous ones:
112 // this prefers new metrics than older once, so will adapt backend changes
113 it.first->second = (it.first->second + time) / 2;
114 }
115 }
116}
static int64_t getMax()
Get the max value of int32_t in int64_t.
Definition ExecTime.h:91
Configuration p

References getMax(), and p.

Referenced by updatePermuteTime().

◆ updatePermuteTime()

void onert::exec::ExecTime::updatePermuteTime ( const backend::Backend from_backend,
const backend::Backend to_backend,
bool  quant,
uint32_t  op_size,
int64_t  time 
)

Update permute time from one backend to another.

Parameters
[in]from_backend
[in]to_backend
[in]quantif input type quantized
[in]timemeasured permutation time
[in]op_sizesum of operation's flattened sizes of inputs and outputs

Definition at line 118 of file ExecTime.cc.

121{
122 updateOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size, time);
123}
void updateOperationExecTime(const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size, int64_t time)
Update exec time of the operation on a backend with given input size or add new entity if there is no...
Definition ExecTime.cc:93

References onert::backend::Backend::config(), and updateOperationExecTime().

Field Documentation

◆ NOT_FOUND

const int64_t onert::exec::ExecTime::NOT_FOUND = -1
inlinestatic

Definition at line 96 of file ExecTime.h.

Referenced by getOperationExecTime().


The documentation for this class was generated from the following files: