Merge pull request #15998 from h2oai/sy-#15989-DT

GH-15989: Craft DT Example
h2oai · Aug 23, 2024 · cd01927 · cd01927
2 parents 10104e9 + 8cc37b2
commit cd01927
Show file tree

Hide file tree

Showing 2 changed files with 120 additions and 0 deletions.
diff --git a/h2o-bindings/bin/custom/python/gen_dt.py b/h2o-bindings/bin/custom/python/gen_dt.py
@@ -6,3 +6,62 @@
 Builds a Decision Tree (DT) on a preprocessed dataset.
 """
 )
+examples = dict(
+    categorical_encoding="""
+    >>> import h2o
+    >>> from h2o.estimators import H2ODecisionTreeEstimator
+    >>> h2o.init()
+    >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+    >>> target_variable = 'CAPSULE'
+    >>> prostate["RACE"] = prostate["RACE"].asfactor()
+    >>> prostate[target_variable] = prostate[target_variable].asfactor()
+    >>> train, test = prostate.split_frame(ratios=[0.7])
+    >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+    ...                                    max_depth=5,
+    ...                                    categorical_encoding="binary")
+    >>> sdt_h2o.train(y=target_variable, training_frame=train)
+    >>> pred_test = sdt_h2o.predict(test)
+    """,
+    ignore_const_cols="""
+    >>> import h2o
+    >>> from h2o.estimators import H2ODecisionTreeEstimator
+    >>> h2o.init()
+    >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+    >>> target_variable = 'CAPSULE'
+    >>> prostate[target_variable] = prostate[target_variable].asfactor()
+    >>> prostate["const_1"] = 6
+    >>> train, test = prostate.split_frame(ratios=[0.7])
+    >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+    ...                                    max_depth=5,
+    ...                                    ignore_const_cols=True)
+    >>> sdt_h2o.train(y=target_variable, training_frame=train)
+    >>> pred_test = sdt_h2o.predict(test)
+    """,
+    max_depth="""
+    >>> import h2o
+    >>> from h2o.estimators import H2ODecisionTreeEstimator
+    >>> h2o.init()
+    >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+    >>> target_variable = 'CAPSULE'
+    >>> prostate[target_variable] = prostate[target_variable].asfactor()
+    >>> train, test = prostate.split_frame(ratios=[0.7])
+    >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+    ...                                    max_depth=5)
+    >>> sdt_h2o.train(y=target_variable, training_frame=train)
+    >>> pred_test = sdt_h2o.predict(test)
+    """,
+    min_rows="""
+    >>> import h2o
+    >>> from h2o.estimators import H2ODecisionTreeEstimator
+    >>> h2o.init()
+    >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+    >>> target_variable = 'CAPSULE'
+    >>> prostate[target_variable] = prostate[target_variable].asfactor()
+    >>> train, test = prostate.split_frame(ratios=[0.7])
+    >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+    ...                                    max_depth=5,
+    ...                                    min_rows=20)
+    >>> sdt_h2o.train(y=target_variable, training_frame=train)
+    >>> pred_test = sdt_h2o.predict(test)
+    """
+)
diff --git a/h2o-py/h2o/estimators/decision_tree.py b/h2o-py/h2o/estimators/decision_tree.py
@@ -107,6 +107,22 @@ def ignore_const_cols(self):
         Ignore constant columns.
 
         Type: ``bool``, defaults to ``True``.
+
+        :examples:
+
+        >>> import h2o
+        >>> from h2o.estimators import H2ODecisionTreeEstimator
+        >>> h2o.init()
+        >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+        >>> target_variable = 'CAPSULE'
+        >>> prostate[target_variable] = prostate[target_variable].asfactor()
+        >>> prostate["const_1"] = 6
+        >>> train, test = prostate.split_frame(ratios=[0.7])
+        >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+        ...                                    max_depth=5,
+        ...                                    ignore_const_cols=True)
+        >>> sdt_h2o.train(y=target_variable, training_frame=train)
+        >>> pred_test = sdt_h2o.predict(test)
         """
         return self._parms.get("ignore_const_cols")
 
@@ -122,6 +138,22 @@ def categorical_encoding(self):
 
         Type: ``Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder",
         "sort_by_response", "enum_limited"]``, defaults to ``"auto"``.
+
+        :examples:
+
+        >>> import h2o
+        >>> from h2o.estimators import H2ODecisionTreeEstimator
+        >>> h2o.init()
+        >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+        >>> target_variable = 'CAPSULE'
+        >>> prostate["RACE"] = prostate["RACE"].asfactor()
+        >>> prostate[target_variable] = prostate[target_variable].asfactor()
+        >>> train, test = prostate.split_frame(ratios=[0.7])
+        >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+        ...                                    max_depth=5,
+        ...                                    categorical_encoding="binary")
+        >>> sdt_h2o.train(y=target_variable, training_frame=train)
+        >>> pred_test = sdt_h2o.predict(test)
         """
         return self._parms.get("categorical_encoding")
 
@@ -164,6 +196,20 @@ def max_depth(self):
         Max depth of tree.
 
         Type: ``int``, defaults to ``20``.
+
+        :examples:
+
+        >>> import h2o
+        >>> from h2o.estimators import H2ODecisionTreeEstimator
+        >>> h2o.init()
+        >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+        >>> target_variable = 'CAPSULE'
+        >>> prostate[target_variable] = prostate[target_variable].asfactor()
+        >>> train, test = prostate.split_frame(ratios=[0.7])
+        >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+        ...                                    max_depth=5)
+        >>> sdt_h2o.train(y=target_variable, training_frame=train)
+        >>> pred_test = sdt_h2o.predict(test)
         """
         return self._parms.get("max_depth")
 
@@ -178,6 +224,21 @@ def min_rows(self):
         Fewest allowed (weighted) observations in a leaf.
 
         Type: ``int``, defaults to ``10``.
+
+        :examples:
+
+        >>> import h2o
+        >>> from h2o.estimators import H2ODecisionTreeEstimator
+        >>> h2o.init()
+        >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
+        >>> target_variable = 'CAPSULE'
+        >>> prostate[target_variable] = prostate[target_variable].asfactor()
+        >>> train, test = prostate.split_frame(ratios=[0.7])
+        >>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
+        ...                                    max_depth=5,
+        ...                                    min_rows=20)
+        >>> sdt_h2o.train(y=target_variable, training_frame=train)
+        >>> pred_test = sdt_h2o.predict(test)
         """
         return self._parms.get("min_rows")