Skip to content

Commit

Permalink
ht/trimmed; shifted; built gradle
Browse files Browse the repository at this point in the history
  • Loading branch information
hannah-tillman committed Jan 17, 2024
1 parent 52ea462 commit e22126a
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 57 deletions.
108 changes: 51 additions & 57 deletions h2o-bindings/bin/custom/python/gen_upliftdrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, max_depth=5, treatment_column=treatment_column, uplift_metric="KL", min_rows=10, seed=1234, auuc_type="qini", auuc_nbins=-1)
>>> uplift_model.train(x=predictors, y=response, training_frame=train, validation_frame=valid)
>>> perf = uplift_model.model_performance()
>>> pred = uplift_model.predict(valid)
>>> perf.plot_uplift(metric="gain", plot=True)
>>> perf.plot_uplift(metric="gain", plot=True, normalize=True)
>>> print(perf.auuc())
>>> print(perf.auuc(metric="lift"))
>>> print(perf.auuc_normalized(metric="lift"))
>>> print(perf.auuc_table())
>>> print(perf.thresholds_and_metric_scores())
>>> print(perf.qini())
>>> print(perf.aecu())
>>> print(perf.aecu_table())
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... treatment_column=treatment_column,
... uplift_metric="KL",
... min_rows=10,
... seed=1234,
... auuc_type="qini",
... auuc_nbins=-1)
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
""",
auuc_type="""
>>> import h2o
Expand All @@ -36,20 +36,18 @@
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, max_depth=5, treatment_column=treatment_column, uplift_metric="KL", min_rows=10, seed=1234, auuc_type="qini")
>>> uplift_model.train(x=predictors, y=response, training_frame=train, validation_frame=valid)
>>> perf = uplift_model.model_performance()
>>> pred = uplift_model.predict(valid)
>>> perf.plot_uplift(metric="gain", plot=True)
>>> perf.plot_uplift(metric="gain", plot=True, normalize=True)
>>> print(perf.auuc())
>>> print(perf.auuc(metric="lift"))
>>> print(perf.auuc_normalized(metric="lift"))
>>> print(perf.auuc_table())
>>> print(perf.thresholds_and_metric_scores())
>>> print(perf.qini())
>>> print(perf.aecu())
>>> print(perf.aecu_table())
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... treatment_column=treatment_column,
... uplift_metric="KL",
... min_rows=10,
... seed=1234,
... auuc_type="qini")
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
""",
treatment_column="""
>>> import h2o
Expand All @@ -62,20 +60,18 @@
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, max_depth=5, uplift_metric="KL", min_rows=10, seed=1234, auuc_type="qini", treatment_column="treatment")
>>> uplift_model.train(x=predictors, y=response, training_frame=train, validation_frame=valid)
>>> perf = uplift_model.model_performance()
>>> pred = uplift_model.predict(valid)
>>> perf.plot_uplift(metric="gain", plot=True)
>>> perf.plot_uplift(metric="gain", plot=True, normalize=True)
>>> print(perf.auuc())
>>> print(perf.auuc(metric="lift"))
>>> print(perf.auuc_normalized(metric="lift"))
>>> print(perf.auuc_table())
>>> print(perf.thresholds_and_metric_scores())
>>> print(perf.qini())
>>> print(perf.aecu())
>>> print(perf.aecu_table())
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... uplift_metric="KL",
... min_rows=10,
... seed=1234,
... auuc_type="qini",
... treatment_column="treatment")
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
""",
uplift_metric="""
>>> import h2o
Expand All @@ -88,19 +84,17 @@
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10, max_depth=5, min_rows=10, seed=1234, auuc_type="qini", treatment_column="treatment", uplift_metric="auto")
>>> uplift_model.train(x=predictors, y=response, training_frame=train, validation_frame=valid)
>>> perf = uplift_model.model_performance()
>>> pred = uplift_model.predict(valid)
>>> perf.plot_uplift(metric="gain", plot=True)
>>> perf.plot_uplift(metric="gain", plot=True, normalize=True)
>>> print(perf.auuc())
>>> print(perf.auuc(metric="lift"))
>>> print(perf.auuc_normalized(metric="lift"))
>>> print(perf.auuc_table())
>>> print(perf.thresholds_and_metric_scores())
>>> print(perf.qini())
>>> print(perf.aecu())
>>> print(perf.aecu_table())
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... min_rows=10,
... seed=1234,
... auuc_type="qini",
... treatment_column="treatment",
... uplift_metric="auto")
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
"""
)
)
101 changes: 101 additions & 0 deletions h2o-py/h2o/estimators/uplift_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,31 @@ def treatment_column(self):
to divide the dataset into treatment (value 1) and control (value 0) groups.
Type: ``str``, defaults to ``"treatment"``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OUpliftRandomForestEstimator
>>> h2o.init()
>>> data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
>>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6","f7", "f8"]
>>> response = "conversion"
>>> data[response] = data[response].asfactor()
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... uplift_metric="KL",
... min_rows=10,
... seed=1234,
... auuc_type="qini",
... treatment_column="treatment")
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
"""
return self._parms.get("treatment_column")

Expand All @@ -565,6 +590,31 @@ def uplift_metric(self):
Divergence metric used to find best split when building an uplift tree.
Type: ``Literal["auto", "kl", "euclidean", "chi_squared"]``, defaults to ``"auto"``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OUpliftRandomForestEstimator
>>> h2o.init()
>>> data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
>>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6","f7", "f8"]
>>> response = "conversion"
>>> data[response] = data[response].asfactor()
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... min_rows=10,
... seed=1234,
... auuc_type="qini",
... treatment_column="treatment",
... uplift_metric="auto")
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
"""
return self._parms.get("uplift_metric")

Expand All @@ -579,6 +629,31 @@ def auuc_type(self):
Metric used to calculate Area Under Uplift Curve.
Type: ``Literal["auto", "qini", "lift", "gain"]``, defaults to ``"auto"``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OUpliftRandomForestEstimator
>>> h2o.init()
>>> data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
>>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6","f7", "f8"]
>>> response = "conversion"
>>> data[response] = data[response].asfactor()
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... treatment_column=treatment_column,
... uplift_metric="KL",
... min_rows=10,
... seed=1234,
... auuc_type="qini")
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
"""
return self._parms.get("auuc_type")

Expand All @@ -593,6 +668,32 @@ def auuc_nbins(self):
Number of bins to calculate Area Under Uplift Curve.
Type: ``int``, defaults to ``-1``.
:examples:
>>> import h2o
>>> from h2o.estimators import H2OUpliftRandomForestEstimator
>>> h2o.init()
>>> data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/uplift/criteo_uplift_13k.csv")
>>> predictors = ["f1", "f2", "f3", "f4", "f5", "f6","f7", "f8"]
>>> response = "conversion"
>>> data[response] = data[response].asfactor()
>>> treatment_column = "treatment"
>>> data[treatment_column] = data[treatment_column].asfactor()
>>> train, valid = data.split_frame(ratios=[.8], seed=1234)
>>> uplift_model = H2OUpliftRandomForestEstimator(ntrees=10,
... max_depth=5,
... treatment_column=treatment_column,
... uplift_metric="KL",
... min_rows=10,
... seed=1234,
... auuc_type="qini",
... auuc_nbins=-1)
>>> uplift_model.train(x=predictors,
... y=response,
... training_frame=train,
... validation_frame=valid)
>>> uplift_model.model_performance()
"""
return self._parms.get("auuc_nbins")

Expand Down

0 comments on commit e22126a

Please sign in to comment.