19 def __init__(self, nnpackage_path, backends="train"):
21 Initialize the train session.
23 nnpackage_path (str): Path to the nnpackage file or directory.
24 backends (str): Backends to use, default is "train".
26 load_start = time.perf_counter()
28 libnnfw_api_pybind.experimental.nnfw_session(nnpackage_path, backends))
29 load_end = time.perf_counter()
30 self.
total_time = {
'MODEL_LOAD': (load_end - load_start) * 1000}
36 def compile(self, optimizer, loss, metrics=[], batch_size=16):
38 Compile the session with optimizer, loss, and metrics.
40 optimizer (Optimizer): Optimizer instance or str.
41 loss (Loss): Loss instance or str.
42 metrics (list): List of metrics to evaluate during training.
43 batch_size (int): Number of samples per batch.
45 ValueError: If the number of metrics does not match the number of model outputs.
47 self.
optimizer = OptimizerRegistry.create_optimizer(optimizer)
if isinstance(
48 optimizer, str)
else optimizer
49 self.
loss = LossRegistry.create_loss(loss)
if isinstance(loss, str)
else loss
51 MetricsRegistry.create_metric(m)
if isinstance(m, str)
else m
for m
in metrics
56 if not isinstance(metric, Metric):
57 raise TypeError(f
"Invalid metric type: {type(metric).__name__}. "
58 "All metrics must inherit from the Metric base class.")
61 num_model_outputs = self.session.output_size()
62 if 0 < len(self.
metrics) != num_model_outputs:
64 f
"Number of metrics ({len(self.metrics)}) does not match the number of model outputs ({num_model_outputs}). "
65 "Please ensure one metric is provided for each model output.")
68 self.
train_info.learning_rate = optimizer.learning_rate
70 self.
train_info.loss_info.loss = LossRegistry.map_loss_function_to_enum(loss)
71 self.
train_info.loss_info.reduction_type = loss.reduction
72 self.
train_info.opt = OptimizerRegistry.map_optimizer_to_enum(optimizer)
73 self.
train_info.num_of_trainable_ops = optimizer.nums_trainable_ops
74 self.session.train_set_traininfo(self.
train_info)
80 compile_start = time.perf_counter()
81 self.session.train_prepare()
82 compile_end = time.perf_counter()
83 self.
total_time[
"COMPILE"] = (compile_end - compile_start) * 1000
111 def train(self, data_loader, epochs, validation_split=0.0, checkpoint_path=None):
113 Train the model using the given data loader.
115 data_loader: A data loader providing input and expected data.
116 batch_size (int): Number of samples per batch.
117 epochs (int): Number of epochs to train.
118 validation_split (float): Ratio of validation data. Default is 0.0 (no validation).
119 checkpoint_path (str): Path to save or load the training checkpoint.
123 "The training session is not properly configured. "
124 "Please call `compile(optimizer, loss)` before calling `train()`.")
127 train_data, val_data = data_loader.split(validation_split)
133 for epoch
in range(epochs):
134 message = [f
"Epoch {epoch + 1}/{epochs}"]
136 epoch_start_time = time.perf_counter()
138 train_loss, avg_io_time, avg_train_time = self.
_run_phase(train_data,
140 message.append(f
"Train time: {avg_train_time:.3f}ms/step")
141 message.append(f
"IO time: {avg_io_time:.3f}ms/step")
142 message.append(f
"Train Loss: {train_loss:.4f}")
145 if validation_split > 0.0:
146 val_loss, _, _ = self.
_run_phase(val_data, train=
False)
147 message.append(f
"Validation Loss: {val_loss:.4f}")
151 message.append(f
"{metric.__class__.__name__}: {metric.result():.4f}")
154 epoch_time = (time.perf_counter() - epoch_start_time) * 1000
155 epoch_times.append(epoch_time)
157 print(
" - ".join(message))
160 if checkpoint_path
is not None:
161 self.session.train_export_checkpoint(checkpoint_path)
170 Run a training or validation phase.
172 data: Data generator providing input and expected data.
173 train (bool): Whether to perform training or validation.
175 float: Average loss for the phase.
183 for inputs, expecteds
in data:
188 data_type=
"expected")
190 set_io_start = time.perf_counter()
192 for i, input_data
in enumerate(inputs):
193 self.session.train_set_input(i, input_data)
197 for i, expected_data
in enumerate(expecteds):
198 expected = np.array(expected_data,
199 dtype=self.session.output_tensorinfo(i).dtype)
200 self.session.train_set_expected(i, expected)
202 output = np.zeros(expected.shape,
203 dtype=self.session.output_tensorinfo(i).dtype)
204 self.session.train_set_output(i, output)
205 assert i == len(outputs)
206 outputs.append(output)
208 set_io_end = time.perf_counter()
211 train_start = time.perf_counter()
212 self.session.
train(update_weights=train)
213 train_end = time.perf_counter()
217 self.session.train_get_loss(i)
for i
in range(len(expecteds)))
218 total_loss += batch_loss
224 metric.update_state(outputs, expecteds)
227 io_time += (set_io_end - set_io_start)
228 train_time += (train_end - train_start)
231 return (total_loss / num_batches, (io_time * 1000) / num_batches,
232 (train_time * 1000) / num_batches)
234 return (0.0, 0.0, 0.0)
255 Train the model for a single batch.
257 inputs (list of np.ndarray): List of input arrays for the batch.
258 expecteds (list of np.ndarray): List of expected output arrays for the batch.
260 dict: A dictionary containing loss and metrics values.
264 "The training session is not properly configured. "
265 "Please call `compile(optimizer, loss)` before calling `train_step()`.")
271 data_type=
"expected")
274 for i, input_data
in enumerate(inputs):
275 self.session.train_set_input(i, input_data)
279 for i, expected_data
in enumerate(expecteds):
280 self.session.train_set_expected(i, expected_data)
281 output = np.zeros(expected_data.shape,
282 dtype=self.session.output_tensorinfo(i).dtype)
283 self.session.train_set_output(i, output)
284 outputs.append(output)
287 train_start = time.perf_counter()
288 self.session.
train(update_weights=
True)
289 train_end = time.perf_counter()
292 losses = [self.session.train_get_loss(i)
for i
in range(len(expecteds))]
297 metric.update_state(outputs, expecteds)
298 metric_results[metric.__class__.__name__] = metric.result()
302 "metrics": metric_results,
303 "train_time": (train_end - train_start) * 1000