feat: Implement hyperparameter search with Optuna

Dmitriy-NK · Dmitriy-NK · commit d5307dbf3d1f · 2025-02-17T17:36:18.000+02:00
diff --git a/experiment/main.py b/experiment/main.py
@@ -16,6 +16,9 @@
 from sklearn.metrics import f1_score, precision_score, recall_score, log_loss, accuracy_score
 from sklearn.model_selection import train_test_split
 from sklearn.utils import compute_class_weight
+import optuna
+from optuna.samplers import TPESampler
+from optuna.pruners import HyperbandPruner
 
 from experiment.plot import save_plot
 from experiment.src.data_loader import read_detected_data, read_metadata, join_label, get_y_labels
@@ -25,6 +28,36 @@
 from experiment.src.prepare_data import prepare_train_data, data_checksum
 
 
+def objective(trial, x_train, y_train, x_test, y_test, hp, device):
+    params = {}
+    for param_name, ((low, high, step), default) in hp.items():
+        params[param_name] = trial.suggest_float(param_name, low, high, step=step)
+
+    model = MlModel(*[x.shape for x in x_train], params).to(device)
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+    criterion = nn.BCELoss()
+
+    dataset = TensorDataset(*[torch.tensor(x, dtype=torch.float32) for x in x_train],
+                            torch.tensor(y_train, dtype=torch.float32))
+    data_loader = DataLoader(dataset, batch_size=1024, shuffle=True)
+
+    model.train()
+    for _ in range(5):
+        for batch in data_loader:
+            x_tensors = [x.to(device) for x in batch[:-1]]
+            y_batch = batch[-1].to(device)
+            optimizer.zero_grad()
+            outputs = model(*x_tensors).squeeze()
+            loss = criterion(outputs, y_batch)
+            loss.backward()
+            optimizer.step()
+
+    predictions = model(*[torch.tensor(x, dtype=torch.float32, device=device)
+                          for x in x_test]).cpu().detach().numpy().ravel()
+    val_loss = criterion(y_test, predictions)
+    return val_loss
+
+
 def evaluate_model(thresholds: dict,
                    model: nn.Module,
                    x_data: List[np.ndarray],
@@ -169,9 +202,16 @@ def main(cred_data_location: str,
     x_train = [x_train_line, x_train_variable, x_train_value, x_train_features]
     x_test = [x_test_line, x_test_variable, x_test_value, x_test_features]
 
-    param_kwargs = {k: v[1] for k, v in hp_dict.items()}
+    if use_tuner:
+        print(f"Start model train with optimization")
+        study = optuna.create_study(sampler=TPESampler(), pruner=HyperbandPruner(), direction="minimize")
+        study.optimize(lambda trial: objective(trial, x_train, y_train, x_test, y_test, hp_dict, device), n_trials=20)
+        param_kwargs = study.best_params
+        print(f"Best hyperparameters: {param_kwargs}")
+    else:
+        param_kwargs = {k: v[1] for k, v in hp_dict.items()}
 
-    print(f"Model is trained with params from dict:{param_kwargs}")
+    print(f"Model will be trained using the following params:{param_kwargs}")
 
     # repeat train step to obtain actual history chart
     ml_model = MlModel(x_full_line.shape, x_full_variable.shape, x_full_value.shape, x_full_features.shape,
diff --git a/experiment/requirements.txt b/experiment/requirements.txt
@@ -14,6 +14,7 @@ tensorrt==10.8.0.43
 tf2onnx==1.16.1
 wrapt==1.14.1
 torch==2.6.0
+optuna==4.2.1
 
 # version insensetive
 types-tensorflow
diff --git a/experiment/src/lstm_model.py b/experiment/src/lstm_model.py
@@ -9,31 +9,38 @@
 
 dtype = torch.float32
 
+
 class MlModel(nn.Module):
 
-    def __init__(
-            self,
-            line_shape: tuple,
-            variable_shape: tuple,
-            value_shape: tuple,
-            feature_shape: tuple,
-            hp=None,
-    ):
+    def __init__(self,
+                 line_shape: tuple,
+                 variable_shape: tuple,
+                 value_shape: tuple,
+                 feature_shape: tuple,
+                 hp=None):
         super(MlModel, self).__init__()
         if hp is None:
             hp = {}
-        value_lstm_dropout_rate = hp.get("value_lstm_dropout_rate", 0.45)
-        line_lstm_dropout_rate = hp.get("line_lstm_dropout_rate", 0.45)
-        variable_lstm_dropout_rate = hp.get("variable_lstm_dropout_rate", 0.45)
-        dense_a_dropout_rate = hp.get("dense_a_lstm_dropout_rate", 0.45)
-        dense_b_dropout_rate = hp.get("dense_b_lstm_dropout_rate", 0.45)
-        #print(f"Input hyperparameters: {hp}")
-        print(f"Run model with parameters: value_lstm_dropout_rate={value_lstm_dropout_rate}, line_lstm_dropout_rate={line_lstm_dropout_rate}, variable_lstm_dropout_rate={variable_lstm_dropout_rate}, dense_a_dropout_rate={dense_a_dropout_rate}, dense_b_dropout_rate={dense_b_dropout_rate}")
+        value_lstm_dropout_rate = self.__get_hyperparam("value_lstm_dropout_rate", hp)
+        line_lstm_dropout_rate = self.__get_hyperparam("line_lstm_dropout_rate", hp)
+        variable_lstm_dropout_rate = self.__get_hyperparam("variable_lstm_dropout_rate", hp)
+        dense_a_dropout_rate = self.__get_hyperparam("dense_a_lstm_dropout_rate", hp)
+        dense_b_dropout_rate = self.__get_hyperparam("dense_b_lstm_dropout_rate", hp)
+
         self.d_type = torch.float32
 
-        self.line_lstm = nn.LSTM(input_size=line_shape[2], hidden_size=line_shape[1], batch_first=True, bidirectional=True)
-        self.variable_lstm = nn.LSTM(input_size=variable_shape[2], hidden_size=variable_shape[1], batch_first=True, bidirectional=True)
-        self.value_lstm = nn.LSTM(input_size=value_shape[2], hidden_size=value_shape[1], batch_first=True, bidirectional=True)
+        self.line_lstm = nn.LSTM(input_size=line_shape[2],
+                                 hidden_size=line_shape[1],
+                                 batch_first=True,
+                                 bidirectional=True)
+        self.variable_lstm = nn.LSTM(input_size=variable_shape[2],
+                                     hidden_size=variable_shape[1],
+                                     batch_first=True,
+                                     bidirectional=True)
+        self.value_lstm = nn.LSTM(input_size=value_shape[2],
+                                  hidden_size=value_shape[1],
+                                  batch_first=True,
+                                  bidirectional=True)
 
         self.line_dropout = nn.Dropout(line_lstm_dropout_rate)
         self.variable_dropout = nn.Dropout(variable_lstm_dropout_rate)
@@ -48,7 +55,22 @@ def __init__(
         self.a_dropout = nn.Dropout(dense_a_dropout_rate)
         self.b_dropout = nn.Dropout(dense_b_dropout_rate)
 
-    def forward(self, line_input, variable_input, value_input, feature_input):
+    @staticmethod
+    def __get_hyperparam(param_name: str, hp=None) -> Any:
+        if param := hp.get(param_name):
+            if isinstance(param, float):
+                print(f"'{param_name}' is {param}")
+                return param
+            else:
+                raise ValueError(f"Unexpected '{param_name}': {param}")
+        else:
+            raise ValueError(f"'{param_name}' was not defined during initialization of the model.")
+
+    def forward(self,
+                line_input: torch.Tensor,
+                variable_input: torch.Tensor,
+                value_input: torch.Tensor,
+                feature_input: torch.Tensor):
         line_out, _ = self.line_lstm(line_input)
         line_out = self.line_dropout(line_out[:, -1, :])