Skip to content

Commit

Permalink
training lightgbm [baseline model]
Browse files Browse the repository at this point in the history
  • Loading branch information
ronylpatil committed Oct 5, 2024
1 parent e281db2 commit 122754c
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 9 deletions.
14 changes: 7 additions & 7 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ stages:
size: 1932798
- path: ./src/models/train_model.py
hash: md5
md5: 90f6f48a40e04b569de4dfe36fc48c71
size: 16665
md5: b082ca07b5418b81dee5bd4293f88c85
size: 20270
params:
params.yaml:
base.target: Time_taken(min)
Expand All @@ -120,8 +120,8 @@ stages:
max_features: 0.7
max_leaf_nodes: 50
n_iter_no_change: 13
train_model.model_name: model_xgb
train_model.model_to_train: xgb
train_model.model_name: model_lgbm
train_model.model_to_train: lightgbm
train_model.random_forest:
n_estimators: 100
criterion: squared_error
Expand All @@ -141,10 +141,10 @@ stages:
lambda: 1
alpha: 0
outs:
- path: ./models/model_xgb.joblib
- path: ./models/model_lgbm.joblib
hash: md5
md5: 5b3624e0f22c4ef3ad891b53e5aac0fc
size: 480723
md5: 7838430e1973c7ca9572297fef4387d4
size: 287673
tune_model:
cmd: python ./src/models/tune_model.py
deps:
Expand Down
1 change: 1 addition & 0 deletions models/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/model_rf.joblib
/xgb_tunned.joblib
/model_xgb.joblib
/model_lgbm.joblib
4 changes: 2 additions & 2 deletions params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ mlflow:

train_model:
# {decision_tree: 'dt', random_forest: 'rf', gradient boost: 'gb', xgboost: 'xgb', all models: 'all'}
model_to_train: xgb
model_name: model_xgb # IMP PLEASE CHANGE NAME
model_to_train: lightgbm
model_name: model_lgbm # IMP PLEASE CHANGE NAME

decision_tree:
criterion: squared_error
Expand Down
82 changes: 82 additions & 0 deletions src/models/train_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import inspect
import pathlib
import pandas as pd
import lightgbm as lgb
from datetime import datetime
from sklearn.base import BaseEstimator
from xgboost import XGBRegressor # type: ignore
Expand Down Expand Up @@ -350,6 +351,87 @@ def train_model(
model=model_xgb, model_dir=model_dir, model_name=params["model_name"]
)

if model_to_train == "lightgbm":

try:
model_lgbm = lgb.LGBMRegressor() # ___change___
model_lgbm.fit(x_train, y_train)
infologger.info("[STEP-1] LightGBM fitted successfully.") # ___change___
except Exception as e:
infologger.error( # ___change___
f"Failed to initilize LightGBM. [Error: {e}]. \n[File: {pathlib.Path(__file__)}]\n[Method: {inspect.currentframe().f_code.co_name}]"
)

y_pred = model_lgbm.predict(x_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2_ = r2_score(y_test, y_pred)
adj_r2 = adj_r2_score(r2_, x_train.shape[0], x_train.shape[1])

# setting MLflow
mlflow.set_experiment("DTE [LGBM]") # ___change___
experiment_description = "Training lightgbm regressor." # adding experiment description # ___change___
mlflow.set_experiment_tag("mlflow.note.content", experiment_description)

with mlflow.start_run(description="Training LightGBM - ronil"): # ___change___
# mlflow.log_params(params["xgb"])
mlflow.log_metrics(
{
"mae": round(mae, 3),
"mse": round(mse, 3),
"r2_score": round(r2_, 3),
"adj_r2": round(adj_r2, 3),
}
)

curr_time = datetime.now().strftime("%d%m%y-%H%M%S")
file_name = f"{home_dir}/figures/{curr_time}.png"
residual_plot(y_test=y_test, y_pred=y_pred, path=file_name)
mlflow.log_artifact(file_name, "residual plot")
mlflow.log_artifact(__file__) # loging code with mlflow

# custom model signature
input_schema = Schema(
[
ColSpec("integer", "Age"),
ColSpec("float", "Ratings"),
ColSpec("integer", "Weatherconditions"),
ColSpec("integer", "Road_traffic_density"),
ColSpec("integer", "Vehicle_condition"),
ColSpec("integer", "Type_of_order"),
ColSpec("integer", "Type_of_vehicle"),
ColSpec("integer", "multiple_deliveries"),
ColSpec("integer", "Festival"),
ColSpec("integer", "City"),
ColSpec("float", "haversine_dist"),
ColSpec("float", "estm_time"),
ColSpec("float", "time_lag"),
ColSpec("float", "hour"),
ColSpec("integer", "day"),
ColSpec("integer", "is_weekend"),
ColSpec("integer", "is_rush"),
]
)

# Define a custom output schema
output_schema = Schema([ColSpec("float", "Time_taken(min)")])

# Create a signature
signature = ModelSignature(inputs=input_schema, outputs=output_schema)

mlflow.sklearn.log_model(
model_lgbm, "lightgbm", signature=signature
) # ___change___
mlflow.set_tag("developer", "ronil")
mlflow.set_tag("model", "lightgbm") # ___change___
infologger.info("[STEP-2] Experiment tracked successfully.")

save_model(
model=model_lgbm,
model_dir=model_dir,
model_name=params["model_name"], # ___change___
)


def save_model(model: BaseEstimator, model_dir: str, model_name: str) -> None:
try:
Expand Down

0 comments on commit 122754c

Please sign in to comment.