From 294bb68df9b70de4bea79c072850496ba69fc693 Mon Sep 17 00:00:00 2001 From: ronilpatil Date: Sat, 28 Sep 2024 13:40:26 +0530 Subject: [PATCH] added model tunning into dvc pipeline and execute it --- dvc.lock | 32 ++++++++++++++++++++++++++++++-- dvc.yaml | 16 ++++++++++++++++ models/.gitignore | 1 + params.yaml | 4 ++++ src/models/tune_model.py | 4 ++-- 5 files changed, 53 insertions(+), 4 deletions(-) diff --git a/dvc.lock b/dvc.lock index 707cbb3..48ef139 100644 --- a/dvc.lock +++ b/dvc.lock @@ -94,8 +94,8 @@ stages: size: 1932798 - path: ./src/models/train_model.py hash: md5 - md5: 19e77eb2eec71235661cbea2f088c3d6 - size: 16692 + md5: 90f6f48a40e04b569de4dfe36fc48c71 + size: 16665 params: params.yaml: base.target: Time_taken(min) @@ -145,3 +145,31 @@ stages: hash: md5 md5: 4804de180ead9bca13523f33d5e725b0 size: 866646 + tune_model: + cmd: python ./src/models/tune_model.py + deps: + - path: ./data/processed/processed_test.csv + hash: md5 + md5: 8fbf4dba54f10d8f1c9d6b1c85b81041 + size: 435216 + - path: ./data/processed/processed_train.csv + hash: md5 + md5: ae10fc577ecbf3cfe7311c1af6e59e2c + size: 1932798 + - path: ./src/models/tune_model.py + hash: md5 + md5: 9f7e26afd8ac368de8c0de728cdd0fb2 + size: 7639 + params: + params.yaml: + base.target: Time_taken(min) + feature_engineering.export_path: /data/processed + mlflow.repo_name: delivery-time-estm + mlflow.repo_owner: ronylpatil + tune_model.model_name: xgb_tunned + tune_model.n_trials: 50 + outs: + - path: ./models/xgb_tunned.joblib + hash: md5 + md5: e45b2fd0bfcb258d1b3fa27cfdc77132 + size: 253468 diff --git a/dvc.yaml b/dvc.yaml index c8e9f84..bea4665 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -63,3 +63,19 @@ stages: - train_model.xgb outs: - ./models/${train_model.model_name}.joblib + + tune_model: + cmd: python ./src/models/tune_model.py + deps: + - ./src/models/tune_model.py + - .${feature_engineering.export_path}/${feature_engineering.filename_train}.csv + - .${feature_engineering.export_path}/${feature_engineering.filename_test}.csv + params: + - base.target + - feature_engineering.export_path + - mlflow.repo_name + - mlflow.repo_owner + - tune_model.model_name + - tune_model.n_trials + outs: + - ./models/${tune_model.model_name}.joblib diff --git a/models/.gitignore b/models/.gitignore index 3332423..2e94487 100644 --- a/models/.gitignore +++ b/models/.gitignore @@ -1 +1,2 @@ /model_rf.joblib +/xgb_tunned.joblib diff --git a/params.yaml b/params.yaml index 8c5d78e..0178c86 100644 --- a/params.yaml +++ b/params.yaml @@ -72,3 +72,7 @@ train_model: max_depth: 7 # {} lambda: 1 alpha: 0 + +tune_model: + model_name: xgb_tunned + n_trials: 50 diff --git a/src/models/tune_model.py b/src/models/tune_model.py index 7c50743..a8e9955 100644 --- a/src/models/tune_model.py +++ b/src/models/tune_model.py @@ -81,7 +81,7 @@ def objective_xgb(trial) -> float: # Perform 3-fold cross-validation and calculate accuracy score = cross_val_score( - model, x_train, y_train, cv=5, scoring="neg_mean_squared_error" + model, x_train, y_train, cv=5, scoring="neg_mean_absolute_error" ).mean() return score # Return the accuracy score for Optuna to maximize @@ -182,7 +182,7 @@ def objective_xgb(trial) -> float: mlflow.sklearn.log_model(best_model, "tunned xgbR", signature=signature) mlflow.set_tag("developer", "ronil") mlflow.set_tag("model", "xgbR") - mlflow.set_tag("objective", "neg_mean_squared_error") + mlflow.set_tag("objective", "neg_mean_absolute_error") infologger.info("Experiment tracked successfully.") save_model(