Skip to content

Commit

Permalink
tune lightgbm using OPTUNA
Browse files Browse the repository at this point in the history
  • Loading branch information
ronylpatil committed Oct 5, 2024
1 parent 122754c commit 968da99
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 15 deletions.
12 changes: 6 additions & 6 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,18 @@ stages:
size: 1932798
- path: ./src/models/tune_model.py
hash: md5
md5: 068e865549b7a559aba40b3fb6ec4dbe
size: 7638
md5: 276d7dc0756ab95db5edd5545bc7346e
size: 8967
params:
params.yaml:
base.target: Time_taken(min)
feature_engineering.export_path: /data/processed
mlflow.repo_name: delivery-time-estm
mlflow.repo_owner: ronylpatil
tune_model.model_name: xgb_tunned
tune_model.model_name: lgbm_tunned
tune_model.n_trials: 100
outs:
- path: ./models/xgb_tunned.joblib
- path: ./models/lgbm_tunned.joblib
hash: md5
md5: 72c2a3052e23d636a4ea72c49d3a276b
size: 165687
md5: 39c309fa4af074d60e6c248206ff13f4
size: 431794
1 change: 1 addition & 0 deletions models/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/xgb_tunned.joblib
/model_xgb.joblib
/model_lgbm.joblib
/lgbm_tunned.joblib
4 changes: 2 additions & 2 deletions params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ mlflow:
repo_name: delivery-time-estm

train_model:
# {decision_tree: 'dt', random_forest: 'rf', gradient boost: 'gb', xgboost: 'xgb', all models: 'all'}
# {decision_tree: 'dt', random_forest: 'rf', gradient boost: 'gb', xgboost: 'xgb', lightgbm: 'lightgbm'}
model_to_train: lightgbm
model_name: model_lgbm # IMP PLEASE CHANGE NAME

Expand Down Expand Up @@ -74,5 +74,5 @@ train_model:
alpha: 0

tune_model:
model_name: xgb_tunned
model_name: lgbm_tunned
n_trials: 100
44 changes: 37 additions & 7 deletions src/models/tune_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pathlib
import dagshub
import pandas as pd
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor # type: ignore
from mlflow.models import ModelSignature
from mlflow.types.schema import Schema, ColSpec
Expand Down Expand Up @@ -86,6 +87,35 @@ def objective_xgb(trial) -> float:

return score # Return the accuracy score for Optuna to maximize

def objective_lgbm(trial) -> float:
# Suggest values for the hyperparameters
boosting_type = trial.suggest_categorical("boosting_type", ["gbdt", "dart"])
num_leaves = trial.suggest_int("num_leaves", 20, 50)
max_depth = trial.suggest_int("max_depth", 3, 7)
learning_rate = trial.suggest_float("learning_rate", 0.1, 0.3, step=0.01)
n_estimators = trial.suggest_int("n_estimators ", 50, 200)
reg_alpha = trial.suggest_float("reg_alpha", 0.1, 0.5, step=0.1)
reg_lambda = trial.suggest_float("reg_lambda", 0.1, 0.5, step=0.1)
min_child_samples = trial.suggest_int("min_child_samples", 20, 200)

# Create the RandomForestClassifier with suggested hyperparameters
model = LGBMRegressor(
boosting_type=boosting_type,
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
min_child_samples=min_child_samples
)

# Perform 3-fold cross-validation and calculate accuracy
score = cross_val_score(
model, x_train, y_train, cv=5, scoring="neg_mean_absolute_error"
).mean()

return score # Return the accuracy score for Optuna to maximize

if __name__ == "__main__":

Expand Down Expand Up @@ -117,10 +147,10 @@ def objective_xgb(trial) -> float:
study = optuna.create_study(
direction="maximize", sampler=optuna.samplers.TPESampler()
) # We aim to maximize accuracy
study.optimize(objective_xgb, n_trials=params["n_trials"])
study.optimize(objective_lgbm, n_trials=params["n_trials"])

# training model with optimized hyperparameters
best_model = XGBRegressor(**study.best_trial.params)
best_model = LGBMRegressor(**study.best_trial.params)
best_model.fit(x_train, y_train)
y_pred = best_model.predict(x_test)

Expand All @@ -130,13 +160,13 @@ def objective_xgb(trial) -> float:
adj_r2 = adj_r2_score(r2_, x_train.shape[0], x_train.shape[1])

# setting MLflow
mlflow.set_experiment("DTE [Fine Tunning XGB]")
mlflow.set_experiment("DTE [Fine Tunning LGBM]")
experiment_description = (
"Tunning xgboost regressor." # adding experiment description
"Tunning lightgbm regressor." # adding experiment description
)
mlflow.set_experiment_tag("mlflow.note.content", experiment_description)

with mlflow.start_run(description="Tunning XGBRegressor - ronil"):
with mlflow.start_run(description="Tunning LGBMRegressor - ronil"):
mlflow.log_params(study.best_trial.params)
mlflow.log_params({"n_trials": params["n_trials"]})
mlflow.log_metrics(
Expand Down Expand Up @@ -179,9 +209,9 @@ def objective_xgb(trial) -> float:
# Create a signature
signature = ModelSignature(inputs=input_schema, outputs=output_schema)

mlflow.sklearn.log_model(best_model, "tunned xgbR", signature=signature)
mlflow.sklearn.log_model(best_model, "tunned lgbmR", signature=signature)
mlflow.set_tag("developer", "ronil")
mlflow.set_tag("model", "xgbR")
mlflow.set_tag("model", "lgbmR")
mlflow.set_tag("objective", "neg_mean_absolute_error")
infologger.info("Experiment tracked successfully.")

Expand Down

0 comments on commit 968da99

Please sign in to comment.