tune lightgbm using OPTUNA

ronylpatil · Oct 5, 2024 · 968da99 · 968da99
1 parent 122754c
commit 968da99
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 15 deletions.
diff --git a/dvc.lock b/dvc.lock
@@ -158,18 +158,18 @@ stages:
       size: 1932798
     - path: ./src/models/tune_model.py
       hash: md5
-      md5: 068e865549b7a559aba40b3fb6ec4dbe
-      size: 7638
+      md5: 276d7dc0756ab95db5edd5545bc7346e
+      size: 8967
     params:
       params.yaml:
         base.target: Time_taken(min)
         feature_engineering.export_path: /data/processed
         mlflow.repo_name: delivery-time-estm
         mlflow.repo_owner: ronylpatil
-        tune_model.model_name: xgb_tunned
+        tune_model.model_name: lgbm_tunned
         tune_model.n_trials: 100
     outs:
-    - path: ./models/xgb_tunned.joblib
+    - path: ./models/lgbm_tunned.joblib
       hash: md5
-      md5: 72c2a3052e23d636a4ea72c49d3a276b
-      size: 165687
+      md5: 39c309fa4af074d60e6c248206ff13f4
+      size: 431794
diff --git a/models/.gitignore b/models/.gitignore
@@ -2,3 +2,4 @@
 /xgb_tunned.joblib
 /model_xgb.joblib
 /model_lgbm.joblib
+/lgbm_tunned.joblib
diff --git a/params.yaml b/params.yaml
@@ -29,7 +29,7 @@ mlflow:
   repo_name: delivery-time-estm
 
 train_model:
-  # {decision_tree: 'dt', random_forest: 'rf', gradient boost: 'gb', xgboost: 'xgb', all models: 'all'}
+  # {decision_tree: 'dt', random_forest: 'rf', gradient boost: 'gb', xgboost: 'xgb', lightgbm: 'lightgbm'}
   model_to_train: lightgbm
   model_name: model_lgbm        # IMP PLEASE CHANGE NAME
 
@@ -74,5 +74,5 @@ train_model:
     alpha: 0
 
 tune_model:
-  model_name: xgb_tunned
+  model_name: lgbm_tunned
   n_trials: 100
diff --git a/src/models/tune_model.py b/src/models/tune_model.py
@@ -5,6 +5,7 @@
 import pathlib
 import dagshub
 import pandas as pd
+from lightgbm import LGBMRegressor
 from xgboost import XGBRegressor  # type: ignore
 from mlflow.models import ModelSignature
 from mlflow.types.schema import Schema, ColSpec
@@ -86,6 +87,35 @@ def objective_xgb(trial) -> float:
 
     return score  # Return the accuracy score for Optuna to maximize
 
+def objective_lgbm(trial) -> float:
+    # Suggest values for the hyperparameters
+    boosting_type = trial.suggest_categorical("boosting_type", ["gbdt", "dart"])
+    num_leaves = trial.suggest_int("num_leaves", 20, 50)
+    max_depth = trial.suggest_int("max_depth", 3, 7)   
+    learning_rate = trial.suggest_float("learning_rate", 0.1, 0.3, step=0.01)
+    n_estimators  = trial.suggest_int("n_estimators ", 50, 200)
+    reg_alpha  = trial.suggest_float("reg_alpha", 0.1, 0.5, step=0.1)
+    reg_lambda   = trial.suggest_float("reg_lambda", 0.1, 0.5, step=0.1)
+    min_child_samples = trial.suggest_int("min_child_samples", 20, 200)
+
+    # Create the RandomForestClassifier with suggested hyperparameters
+    model = LGBMRegressor(
+        boosting_type=boosting_type,
+        num_leaves=num_leaves,
+        max_depth=max_depth,
+        learning_rate=learning_rate,
+        n_estimators=n_estimators,
+        reg_alpha=reg_alpha,
+        reg_lambda=reg_lambda,
+        min_child_samples=min_child_samples
+    )
+
+    # Perform 3-fold cross-validation and calculate accuracy
+    score = cross_val_score(
+        model, x_train, y_train, cv=5, scoring="neg_mean_absolute_error"
+    ).mean()
+
+    return score  # Return the accuracy score for Optuna to maximize
 
 if __name__ == "__main__":
 
@@ -117,10 +147,10 @@ def objective_xgb(trial) -> float:
     study = optuna.create_study(
         direction="maximize", sampler=optuna.samplers.TPESampler()
     )  # We aim to maximize accuracy
-    study.optimize(objective_xgb, n_trials=params["n_trials"])
+    study.optimize(objective_lgbm, n_trials=params["n_trials"])
 
     # training model with optimized hyperparameters
-    best_model = XGBRegressor(**study.best_trial.params)
+    best_model = LGBMRegressor(**study.best_trial.params)
     best_model.fit(x_train, y_train)
     y_pred = best_model.predict(x_test)
 
@@ -130,13 +160,13 @@ def objective_xgb(trial) -> float:
     adj_r2 = adj_r2_score(r2_, x_train.shape[0], x_train.shape[1])
 
     # setting MLflow
-    mlflow.set_experiment("DTE [Fine Tunning XGB]")
+    mlflow.set_experiment("DTE [Fine Tunning LGBM]")
     experiment_description = (
-        "Tunning xgboost regressor."  # adding experiment description
+        "Tunning lightgbm regressor."  # adding experiment description
     )
     mlflow.set_experiment_tag("mlflow.note.content", experiment_description)
 
-    with mlflow.start_run(description="Tunning XGBRegressor - ronil"):
+    with mlflow.start_run(description="Tunning LGBMRegressor - ronil"):
         mlflow.log_params(study.best_trial.params)
         mlflow.log_params({"n_trials": params["n_trials"]})
         mlflow.log_metrics(
@@ -179,9 +209,9 @@ def objective_xgb(trial) -> float:
         # Create a signature
         signature = ModelSignature(inputs=input_schema, outputs=output_schema)
 
-        mlflow.sklearn.log_model(best_model, "tunned xgbR", signature=signature)
+        mlflow.sklearn.log_model(best_model, "tunned lgbmR", signature=signature)
         mlflow.set_tag("developer", "ronil")
-        mlflow.set_tag("model", "xgbR")
+        mlflow.set_tag("model", "lgbmR")
         mlflow.set_tag("objective", "neg_mean_absolute_error")
         infologger.info("Experiment tracked successfully.")