ONE - On-device Neural Engine
Loading...
Searching...
No Matches
onert::exec::ExecTime Class Reference

#include <ExecTime.h>

Public Member Functions

 ExecTime (const std::vector< const backend::Backend * > &backends)
 
int64_t getOperationExecTime (const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size) const
 Get exec time of an operation with input size or linearly interpolated value based on size if there is no record for given size.
 
void updateOperationExecTime (const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size, int64_t time)
 Update exec time of the operation on a backend with given input size or add new entity if there is no one.
 
int64_t getPermuteTime (const backend::Backend *from_backend, const backend::Backend *to_backend, bool quant, uint32_t op_size) const
 Get the permute time from one backend to another.
 
void updatePermuteTime (const backend::Backend *from_backend, const backend::Backend *to_backend, bool quant, uint32_t op_size, int64_t time)
 Update permute time from one backend to another.
 
void storeOperationsExecTime () const
 Update metrics file with new data.
 

Static Public Member Functions

static int64_t getMax ()
 Get the max value of int32_t in int64_t.
 

Static Public Attributes

static const int64_t NOT_FOUND = -1
 

Detailed Description

Definition at line 33 of file ExecTime.h.

Constructor & Destructor Documentation

◆ ExecTime()

onert::exec::ExecTime::ExecTime ( const std::vector< const backend::Backend * > &  backends)
inlineexplicit

Definition at line 36 of file ExecTime.h.

37 : _json(backends, _measurements)
38 {
39 }

Member Function Documentation

◆ getMax()

static int64_t onert::exec::ExecTime::getMax ( )
inlinestatic

Get the max value of int32_t in int64_t.

Returns
max value

Definition at line 93 of file ExecTime.h.

93{ return _MAX; }

Referenced by updateOperationExecTime().

◆ getOperationExecTime()

int64_t onert::exec::ExecTime::getOperationExecTime ( const backend::Backend backend,
const std::string &  operation,
bool  quant,
uint32_t  op_size 
) const

Get exec time of an operation with input size or linearly interpolated value based on size if there is no record for given size.

Parameters
[in]backendid of a backend
[in]operationname of an operation
[in]quantif input type quantized
[in]op_sizesum of operation's flattened sizes of inputs and outputs
Returns
execution time for given input sizes -1 if there are no records for given parameters (backend, op, quantization).

Definition at line 27 of file ExecTime.cc.

30{
31 auto found_backend = _measurements.find(backend);
32 if (found_backend == _measurements.end())
33 return NOT_FOUND; // no execution time for this backend
34
35 auto found_operation_with_type = found_backend->second.find(operation);
36 if (found_operation_with_type == found_backend->second.end())
37 // no execution time for this operation
38 return NOT_FOUND;
39
40 auto found_operation = found_operation_with_type->second.find(quant);
41 if (found_operation == found_operation_with_type->second.end())
42 // no execution time for this operation
43 return NOT_FOUND;
44
45 auto found_size = found_operation->second.find(op_size);
46 if (found_size != found_operation->second.end())
47 return found_size->second; // found execution time
48
49 // Try to interpolate
50 if (found_operation->second.size() < 2)
51 // not possible to do linear interpolation
52 return found_operation->second.begin()->second;
53
54 // if we reach here, then this means, that there is no record, that is equal to op_size
55 auto upper_bound = found_operation->second.upper_bound(op_size); // > op_size
56 auto lower_bound = upper_bound;
57
58 if (upper_bound == found_operation->second.end()) // all values <= op_size
59 {
60 upper_bound--;
61 lower_bound = upper_bound;
62 lower_bound--;
63 }
64 else if (upper_bound == found_operation->second.begin()) // all values > op_size
65 {
66 upper_bound++;
67 }
68 else // op_size between
69 {
70 lower_bound--;
71 }
72
73 // Linear interpolation
74 const auto x0 = static_cast<int64_t>(lower_bound->first); // size
75 const auto x1 = static_cast<int64_t>(upper_bound->first); // size
76 const int64_t y0 = lower_bound->second; // time
77 const int64_t y1 = upper_bound->second; // time
78 const auto x = static_cast<int64_t>(op_size);
79
80 int64_t interpolated_value = y0 + (x - x0) * (y1 - y0) / (x1 - x0);
81
82 // In some cases ops with smaller inputs is executed slower than the one
83 // with larger inputs, more likely because of a backend's load difference
84 if (interpolated_value < 0 && x > x1)
85 {
86 return y0;
87 }
88 // It must be non-positive ONLY if it's lesser than both of them
89 assert(interpolated_value > 0 || x < x0);
90
91 // execution time must be non-negative
92 return std::max<int64_t>(interpolated_value, 1);
93}
static const int64_t NOT_FOUND
Definition ExecTime.h:98

References NOT_FOUND.

Referenced by getPermuteTime().

◆ getPermuteTime()

int64_t onert::exec::ExecTime::getPermuteTime ( const backend::Backend from_backend,
const backend::Backend to_backend,
bool  quant,
uint32_t  op_size 
) const

Get the permute time from one backend to another.

Parameters
[in]from_backend
[in]to_backend
[in]quantif input type quantized
[in]op_sizesum of operation's flattened sizes of inputs and outputs
Returns
permutation time for operation size

Definition at line 127 of file ExecTime.cc.

130{
131 return getOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size);
132}
int64_t getOperationExecTime(const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size) const
Get exec time of an operation with input size or linearly interpolated value based on size if there i...
Definition ExecTime.cc:27

References onert::backend::Backend::config(), and getOperationExecTime().

◆ storeOperationsExecTime()

void onert::exec::ExecTime::storeOperationsExecTime ( ) const
inline

Update metrics file with new data.

Definition at line 97 of file ExecTime.h.

97{ _json.storeOperationsExecTime(); }
void storeOperationsExecTime() const
Update _measurement_file with new data.

References onert::exec::JSON::storeOperationsExecTime().

◆ updateOperationExecTime()

void onert::exec::ExecTime::updateOperationExecTime ( const backend::Backend backend,
const std::string &  operation,
bool  quant,
uint32_t  op_size,
int64_t  time 
)

Update exec time of the operation on a backend with given input size or add new entity if there is no one.

Parameters
[in]backendid of a backend
[in]operationname of an operation
[in]quantif input type quantized
[in]op_sizesum of operation's flattened sizes of inputs and outputs
[in]timereal measured value

Definition at line 95 of file ExecTime.cc.

98{
99 // If the op is not implemented for some input, it should not be scheduled
100 const auto &recs = _measurements[backend][operation][quant];
101 if (time == getMax() ||
102 std::any_of(recs.begin(), recs.end(),
103 [](std::pair<const uint32_t, const int64_t> p) { return p.second == getMax(); }))
104 {
105 _measurements[backend][operation][quant].clear();
106 _measurements[backend][operation][quant].emplace(op_size, getMax());
107 }
108 else
109 {
110 auto it = _measurements[backend][operation][quant].emplace(op_size, time);
111 if (!it.second)
112 {
113 // affect of the last measurement is bigger than the previous ones:
114 // this prefers new metrics than older once, so will adapt backend changes
115 it.first->second = (it.first->second + time) / 2;
116 }
117 }
118}
static int64_t getMax()
Get the max value of int32_t in int64_t.
Definition ExecTime.h:93

References getMax().

Referenced by updatePermuteTime().

◆ updatePermuteTime()

void onert::exec::ExecTime::updatePermuteTime ( const backend::Backend from_backend,
const backend::Backend to_backend,
bool  quant,
uint32_t  op_size,
int64_t  time 
)

Update permute time from one backend to another.

Parameters
[in]from_backend
[in]to_backend
[in]quantif input type quantized
[in]timemeasured permutation time
[in]op_sizesum of operation's flattened sizes of inputs and outputs

Definition at line 120 of file ExecTime.cc.

123{
124 updateOperationExecTime(from_backend, to_backend->config()->id(), quant, op_size, time);
125}
void updateOperationExecTime(const backend::Backend *backend, const std::string &operation, bool quant, uint32_t op_size, int64_t time)
Update exec time of the operation on a backend with given input size or add new entity if there is no...
Definition ExecTime.cc:95

References onert::backend::Backend::config(), and updateOperationExecTime().

Field Documentation

◆ NOT_FOUND

const int64_t onert::exec::ExecTime::NOT_FOUND = -1
static

Definition at line 98 of file ExecTime.h.

Referenced by getOperationExecTime().


The documentation for this class was generated from the following files: