Skip to content

Commit

Permalink
ht/requested updates
Browse files Browse the repository at this point in the history
- removed obj_reg, custom_metric_func, & multinode_mode
- updated coef, coef_norm, build_glm_model, influence, p_values_threshold, & mode
  • Loading branch information
hannah-tillman committed Jan 22, 2024
1 parent 195bc71 commit 353f1c3
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 165 deletions.
107 changes: 26 additions & 81 deletions h2o-bindings/bin/custom/python/gen_modelselection.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ def coef_norm(self, predictor_size=None):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff_norm = maxrModel.coef_norm()
>>> print(coeff_norm)
>>> coeff_norm_3 = maxrModel.coef_norm(predictor_size=3) # print coefficient norm with 3 predictors
>>> print(coeff_norm_3)
"""
model_ids = self._model_json["output"]["best_model_ids"]
if not(self.actual_params["build_glm_model"]) and self.actual_params["mode"]=="maxrsweep":
Expand Down Expand Up @@ -125,10 +125,10 @@ def coef(self, predictor_size=None):
... seed=12345,
... mode="maxr")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> coeff_3 = maxrModel.coef(predictor_size=3)
>>> print(coeff_3)
"""
if not self.actual_params["build_glm_model"] and self.actual_params["mode"]=="maxrsweep":
coef_names = self._model_json["output"]["coefficient_names"]
Expand Down Expand Up @@ -269,15 +269,17 @@ def get_best_model_predictors(self):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr",
... build_glm_model=False)
... mode="maxrsweep",
... build_glm_model=True)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> result = maxrModel.result()
>>> # get the GLM model with the best performance for a fixed predictor size:
>>> one_model = h2o.get_model(result["model_id"][ind,0])
>>> predict = one_model.predict(prostate)
>>> # print a version of the predict frame:
>>> print(predict)
""",
influence="""
>>> import h2o
Expand All @@ -286,32 +288,13 @@ def get_best_model_predictors(self):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr",
... influence="dfbetas")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
""",
multinode_mode="""
>>> import h2o
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... multinode_mode=False)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> glm_rid = maxrModel.get_regression_influence_diagnostics()
>>> print(glm_rid)
""",
nparallelism="""
>>> import h2o
Expand All @@ -335,51 +318,15 @@ def get_best_model_predictors(self):
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> predictors = ["AGE", "RACE", "CAPSULE", DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... p_values_threshold=0.0)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
""",
custom_metric_func="""
>>> import h2o
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... early_stopping=False)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
""",
obj_reg="""
>>> import h2o
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... obj_reg=-1.0)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> backwardModel = H2OModelSelectionEstimator(min_predictor_number=2,
... seed=12345,
... mode="backward",
... p_values_threshold=0.001)
>>> backwardModel.train(x=predictors, y=response, training_frame=prostate)
>>> result = backwardModel.result()
>>> print(result)
""",
mode="""
>>> import h2o
Expand All @@ -388,13 +335,11 @@ def get_best_model_predictors(self):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
"""
)
110 changes: 26 additions & 84 deletions h2o-py/h2o/estimators/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,24 +998,6 @@ def obj_reg(self):
Likelihood divider in objective value computation, default (of -1.0) will set it to 1/nobs
Type: ``float``, defaults to ``-1.0``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... obj_reg=-1.0)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
"""
return self._parms.get("obj_reg")

Expand Down Expand Up @@ -1148,24 +1130,6 @@ def custom_metric_func(self):
Reference to custom evaluation function, format: `language:keyName=funcName`
Type: ``str``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... early_stopping=False)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
"""
return self._parms.get("custom_metric_func")

Expand Down Expand Up @@ -1253,14 +1217,12 @@ def mode(self):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
"""
return self._parms.get("mode")

Expand All @@ -1287,15 +1249,17 @@ def build_glm_model(self):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr",
... build_glm_model=False)
... mode="maxrsweep",
... build_glm_model=True)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> result = maxrModel.result()
>>> # get the GLM model with the best performance for a fixed predictor size:
>>> one_model = h2o.get_model(result["model_id"][ind,0])
>>> predict = one_model.predict(prostate)
>>> # print a version of the predict frame:
>>> print(predict)
"""
return self._parms.get("build_glm_model")

Expand All @@ -1318,17 +1282,15 @@ def p_values_threshold(self):
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> predictors = ["AGE", "RACE", "CAPSULE", DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... p_values_threshold=0.0)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> backwardModel = H2OModelSelectionEstimator(min_predictor_number=2,
... seed=12345,
... mode="backward",
... p_values_threshold=0.001)
>>> backwardModel.train(x=predictors, y=response, training_frame=prostate)
>>> result = backwardModel.result()
>>> print(result)
"""
return self._parms.get("p_values_threshold")

Expand All @@ -1352,15 +1314,13 @@ def influence(self):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr",
... influence="dfbetas")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> glm_rid = maxrModel.get_regression_influence_diagnostics()
>>> print(glm_rid)
"""
return self._parms.get("influence")

Expand All @@ -1376,24 +1336,6 @@ def multinode_mode(self):
Defaults to false.
Type: ``bool``, defaults to ``False``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OModelSelectionEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
... seed=12345,
... mode="maxr",
... multinode_mode=False)
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
"""
return self._parms.get("multinode_mode")

Expand Down Expand Up @@ -1455,14 +1397,14 @@ def coef_norm(self, predictor_size=None):
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
>>> response = "GLEASON"
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
... seed=12345,
... mode="maxr")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff_norm = maxrModel.coef_norm()
>>> print(coeff_norm)
>>> coeff_norm_3 = maxrModel.coef_norm(predictor_size=3) # print coefficient norm with 3 predictors
>>> print(coeff_norm_3)
"""
model_ids = self._model_json["output"]["best_model_ids"]
if not(self.actual_params["build_glm_model"]) and self.actual_params["mode"]=="maxrsweep":
Expand Down Expand Up @@ -1529,10 +1471,10 @@ def coef(self, predictor_size=None):
... seed=12345,
... mode="maxr")
>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
>>> results = maxrModel.result()
>>> print(results)
>>> coeff = maxrModel.coef()
>>> print(coeff)
>>> coeff_3 = maxrModel.coef(predictor_size=3)
>>> print(coeff_3)
"""
if not self.actual_params["build_glm_model"] and self.actual_params["mode"]=="maxrsweep":
coef_names = self._model_json["output"]["coefficient_names"]
Expand Down

0 comments on commit 353f1c3

Please sign in to comment.