Skip to content

Commit

Permalink
Merge pull request #15998 from h2oai/sy-#15989-DT
Browse files Browse the repository at this point in the history
GH-15989: Craft DT Example
  • Loading branch information
shaunyogeshwaran authored Aug 23, 2024
2 parents 10104e9 + 8cc37b2 commit cd01927
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
59 changes: 59 additions & 0 deletions h2o-bindings/bin/custom/python/gen_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,62 @@
Builds a Decision Tree (DT) on a preprocessed dataset.
"""
)
examples = dict(
categorical_encoding="""
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate["RACE"] = prostate["RACE"].asfactor()
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5,
... categorical_encoding="binary")
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
""",
ignore_const_cols="""
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> prostate["const_1"] = 6
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5,
... ignore_const_cols=True)
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
""",
max_depth="""
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5)
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
""",
min_rows="""
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5,
... min_rows=20)
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
"""
)
61 changes: 61 additions & 0 deletions h2o-py/h2o/estimators/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,22 @@ def ignore_const_cols(self):
Ignore constant columns.
Type: ``bool``, defaults to ``True``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> prostate["const_1"] = 6
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5,
... ignore_const_cols=True)
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
"""
return self._parms.get("ignore_const_cols")

Expand All @@ -122,6 +138,22 @@ def categorical_encoding(self):
Type: ``Literal["auto", "enum", "one_hot_internal", "one_hot_explicit", "binary", "eigen", "label_encoder",
"sort_by_response", "enum_limited"]``, defaults to ``"auto"``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate["RACE"] = prostate["RACE"].asfactor()
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5,
... categorical_encoding="binary")
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
"""
return self._parms.get("categorical_encoding")

Expand Down Expand Up @@ -164,6 +196,20 @@ def max_depth(self):
Max depth of tree.
Type: ``int``, defaults to ``20``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5)
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
"""
return self._parms.get("max_depth")

Expand All @@ -178,6 +224,21 @@ def min_rows(self):
Fewest allowed (weighted) observations in a leaf.
Type: ``int``, defaults to ``10``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2ODecisionTreeEstimator
>>> h2o.init()
>>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv")
>>> target_variable = 'CAPSULE'
>>> prostate[target_variable] = prostate[target_variable].asfactor()
>>> train, test = prostate.split_frame(ratios=[0.7])
>>> sdt_h2o = H2ODecisionTreeEstimator(model_id="decision_tree.hex",
... max_depth=5,
... min_rows=20)
>>> sdt_h2o.train(y=target_variable, training_frame=train)
>>> pred_test = sdt_h2o.predict(test)
"""
return self._parms.get("min_rows")

Expand Down

0 comments on commit cd01927

Please sign in to comment.