diff --git a/dvc.lock b/dvc.lock index 82cb0bb..aec908a 100644 --- a/dvc.lock +++ b/dvc.lock @@ -94,8 +94,8 @@ stages: size: 1932798 - path: ./src/models/train_model.py hash: md5 - md5: 90f6f48a40e04b569de4dfe36fc48c71 - size: 16665 + md5: b082ca07b5418b81dee5bd4293f88c85 + size: 20270 params: params.yaml: base.target: Time_taken(min) @@ -120,8 +120,8 @@ stages: max_features: 0.7 max_leaf_nodes: 50 n_iter_no_change: 13 - train_model.model_name: model_xgb - train_model.model_to_train: xgb + train_model.model_name: model_lgbm + train_model.model_to_train: lightgbm train_model.random_forest: n_estimators: 100 criterion: squared_error @@ -141,10 +141,10 @@ stages: lambda: 1 alpha: 0 outs: - - path: ./models/model_xgb.joblib + - path: ./models/model_lgbm.joblib hash: md5 - md5: 5b3624e0f22c4ef3ad891b53e5aac0fc - size: 480723 + md5: 7838430e1973c7ca9572297fef4387d4 + size: 287673 tune_model: cmd: python ./src/models/tune_model.py deps: diff --git a/models/.gitignore b/models/.gitignore index 78deb3f..01f27cc 100644 --- a/models/.gitignore +++ b/models/.gitignore @@ -1,3 +1,4 @@ /model_rf.joblib /xgb_tunned.joblib /model_xgb.joblib +/model_lgbm.joblib diff --git a/params.yaml b/params.yaml index cf4a02c..a02e4bb 100644 --- a/params.yaml +++ b/params.yaml @@ -30,8 +30,8 @@ mlflow: train_model: # {decision_tree: 'dt', random_forest: 'rf', gradient boost: 'gb', xgboost: 'xgb', all models: 'all'} - model_to_train: xgb - model_name: model_xgb # IMP PLEASE CHANGE NAME + model_to_train: lightgbm + model_name: model_lgbm # IMP PLEASE CHANGE NAME decision_tree: criterion: squared_error diff --git a/src/models/train_model.py b/src/models/train_model.py index c36484e..88f1e9a 100644 --- a/src/models/train_model.py +++ b/src/models/train_model.py @@ -13,6 +13,7 @@ import inspect import pathlib import pandas as pd +import lightgbm as lgb from datetime import datetime from sklearn.base import BaseEstimator from xgboost import XGBRegressor # type: ignore @@ -350,6 +351,87 @@ def train_model( model=model_xgb, model_dir=model_dir, model_name=params["model_name"] ) + if model_to_train == "lightgbm": + + try: + model_lgbm = lgb.LGBMRegressor() # ___change___ + model_lgbm.fit(x_train, y_train) + infologger.info("[STEP-1] LightGBM fitted successfully.") # ___change___ + except Exception as e: + infologger.error( # ___change___ + f"Failed to initilize LightGBM. [Error: {e}]. \n[File: {pathlib.Path(__file__)}]\n[Method: {inspect.currentframe().f_code.co_name}]" + ) + + y_pred = model_lgbm.predict(x_test) + mae = mean_absolute_error(y_test, y_pred) + mse = mean_squared_error(y_test, y_pred) + r2_ = r2_score(y_test, y_pred) + adj_r2 = adj_r2_score(r2_, x_train.shape[0], x_train.shape[1]) + + # setting MLflow + mlflow.set_experiment("DTE [LGBM]") # ___change___ + experiment_description = "Training lightgbm regressor." # adding experiment description # ___change___ + mlflow.set_experiment_tag("mlflow.note.content", experiment_description) + + with mlflow.start_run(description="Training LightGBM - ronil"): # ___change___ + # mlflow.log_params(params["xgb"]) + mlflow.log_metrics( + { + "mae": round(mae, 3), + "mse": round(mse, 3), + "r2_score": round(r2_, 3), + "adj_r2": round(adj_r2, 3), + } + ) + + curr_time = datetime.now().strftime("%d%m%y-%H%M%S") + file_name = f"{home_dir}/figures/{curr_time}.png" + residual_plot(y_test=y_test, y_pred=y_pred, path=file_name) + mlflow.log_artifact(file_name, "residual plot") + mlflow.log_artifact(__file__) # loging code with mlflow + + # custom model signature + input_schema = Schema( + [ + ColSpec("integer", "Age"), + ColSpec("float", "Ratings"), + ColSpec("integer", "Weatherconditions"), + ColSpec("integer", "Road_traffic_density"), + ColSpec("integer", "Vehicle_condition"), + ColSpec("integer", "Type_of_order"), + ColSpec("integer", "Type_of_vehicle"), + ColSpec("integer", "multiple_deliveries"), + ColSpec("integer", "Festival"), + ColSpec("integer", "City"), + ColSpec("float", "haversine_dist"), + ColSpec("float", "estm_time"), + ColSpec("float", "time_lag"), + ColSpec("float", "hour"), + ColSpec("integer", "day"), + ColSpec("integer", "is_weekend"), + ColSpec("integer", "is_rush"), + ] + ) + + # Define a custom output schema + output_schema = Schema([ColSpec("float", "Time_taken(min)")]) + + # Create a signature + signature = ModelSignature(inputs=input_schema, outputs=output_schema) + + mlflow.sklearn.log_model( + model_lgbm, "lightgbm", signature=signature + ) # ___change___ + mlflow.set_tag("developer", "ronil") + mlflow.set_tag("model", "lightgbm") # ___change___ + infologger.info("[STEP-2] Experiment tracked successfully.") + + save_model( + model=model_lgbm, + model_dir=model_dir, + model_name=params["model_name"], # ___change___ + ) + def save_model(model: BaseEstimator, model_dir: str, model_name: str) -> None: try: