h2oai · shaunyogeshwaran · Feb 1, 2024 · Jan 3, 2024 · Jan 12, 2024 · Jan 18, 2024
diff --git a/h2o-bindings/bin/custom/python/gen_modelselection.py b/h2o-bindings/bin/custom/python/gen_modelselection.py
@@ -42,6 +42,23 @@ def coef_norm(self, predictor_size=None):
 
         :param predictor_size: predictor subset size, will only return model coefficients of that subset size.
         :return: list of Python Dicts of coefficients for all models built with different predictor numbers
+
+        :examples:
+
+        >>> import h2o
+        >>> from h2o.estimators import H2OModelSelectionEstimator
+        >>> h2o.init()
+        >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+        >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+        >>> response = "GLEASON"
+        >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+        ...                                        seed=12345,
+        ...                                        mode="maxr")
+        >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+        >>> results = maxrModel.result()
+        >>> print(results)
+        >>> coeff_norm = maxrModel.coef_norm()
+        >>> print(coeff_norm)
         """
         model_ids = self._model_json["output"]["best_model_ids"]
         if not(self.actual_params["build_glm_model"]) and self.actual_params["mode"]=="maxrsweep":
@@ -95,6 +112,23 @@ def coef(self, predictor_size=None):
 
         :param predictor_size: predictor subset size, will only return model coefficients of that subset size.
         :return: list of Python Dicts of coefficients for all models built with different predictor numbers
+
+        :examples:
+
+        >>> import h2o
+        >>> from h2o.estimators import H2OModelSelectionEstimator
+        >>> h2o.init()
+        >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+        >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+        >>> response = "GLEASON"
+        >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+        ...                                        seed=12345,
+        ...                                        mode="maxr")
+        >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+        >>> results = maxrModel.result()
+        >>> print(results)
+        >>> coeff = maxrModel.coef()
+        >>> print(coeff)
         """
         if not self.actual_params["build_glm_model"] and self.actual_params["mode"]=="maxrsweep":
             coef_names = self._model_json["output"]["coefficient_names"]
@@ -148,6 +182,7 @@ def coef(self, predictor_size=None):
     def result(self):
         """
         Get result frame that contains information about the model building process like for modelselection and anovaglm.
+
         :return: the H2OFrame that contains information about the model building process like for modelselection and anovaglm.
         """
         return H2OFrame._expr(expr=ExprNode("result", ASTId(self.key)))._frame(fill_cache=True)
@@ -225,3 +260,141 @@ def get_best_model_predictors(self):
 mode=maxr, the model returned is no longer guaranteed to have the best R2 value.
 """
 )
+
+examples = dict(
+    build_glm_model="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        build_glm_model=False)
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    influence="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        influence="dfbetas")
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    multinode_mode="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        multinode_mode=False)
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    nparallelism="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        nparallelism=0)
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    p_values_threshold="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        p_values_threshold=0.0)
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    custom_metric_func="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        early_stopping=False)
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    obj_reg="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr",
+...                                        obj_reg=-1.0)
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+""",
+    mode="""
+>>> import h2o
+>>> from h2o.estimators import H2OModelSelectionEstimator
+>>> h2o.init()
+>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
+>>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
+>>> response = "GLEASON"
+>>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=7,
+...                                        seed=12345,
+...                                        mode="maxr")
+>>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
+>>> results = maxrModel.result()
+>>> print(results)
+>>> coeff = maxrModel.coef()
+>>> print(coeff)
+"""
+)