Skip to content

Commit

Permalink
Merge pull request #15945 from h2oai/sy/#15798
Browse files Browse the repository at this point in the history
GH-15798: Craft GAM Examples
  • Loading branch information
shaunyogeshwaran authored Mar 1, 2024
2 parents a5f89f7 + ec34235 commit a064dfc
Show file tree
Hide file tree
Showing 2 changed files with 323 additions and 6 deletions.
175 changes: 172 additions & 3 deletions h2o-bindings/bin/custom/python/gen_gam.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def scoring_history(self):

def get_knot_locations(self, gam_column=None):
"""
Retrieve gam columns knot locations if store_knot_location parameter is enabled. If a gam column name is
Retrieve gam columns knot locations if store_knot_locations parameter is enabled. If a gam column name is
specified, the know loations corresponding to that gam column is returned. Otherwise, all knot locations are
returned for all gam columns. The order of the gam columns are specified in gam_knot_column_names of the
model output.
Expand All @@ -61,8 +61,8 @@ def get_knot_locations(self, gam_column=None):

def get_gam_knot_column_names(self):
"""
Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_location
parameter is enabled.
Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_locations
parameter is enabled.
:return: gam column names whose knot locations are stored in the knot_locations.
"""
Expand Down Expand Up @@ -117,3 +117,172 @@ def get_gam_knot_column_names(self):
MSE, AUC (for logistic regression), degrees of freedom, and confusion matrices.
"""
)
examples = dict(
bs="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... gam_columns=["C6","C7","C8"],
... bs=[0,1,3])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef() # note the spline type in the names of gam column coefficients
""",
gam_columns="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
get_gam_knot_column_names="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... store_knot_locations=True,
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_gam_knot_column_names()
""",
get_knot_locations="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... store_knot_locations=True,
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_knot_locations()
""",
keep_gam_cols="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> train, test = h2o_data.split_frame(ratios = [.8])
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... keep_gam_cols=True,
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o.get_frame(h2o_model._model_json["output"] ["gam_transformed_center_key"])
""",
knot_ids="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> knots1 = [-1.99905699, -0.98143075, 0.02599159, 1.00770987, 1.99942290]
>>> frameKnots1 = h2o.H2OFrame(python_obj=knots1)
>>> knots2 = [-1.999821861, -1.005257990, -0.006716042, 1.002197392, 1.999073589]
>>> frameKnots2 = h2o.H2OFrame(python_obj=knots2)
>>> knots3 = [-1.999675688, -0.979893796, 0.007573327, 1.011437347, 1.999611676]
>>> frameKnots3 = h2o.H2OFrame(python_obj=knots3)
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")()
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> train, test = h2o_data.split_frame(ratios = [.8])
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... gam_columns=["C6","C7","C8"],
... store_knot_locations=True,
... knot_ids=[frameKnots1.key, frameKnots2.key, frameKnots3.key])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_knot_locations()
""",
num_knots="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> train, test = h2o_data.split_frame(ratios = [.8])
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... store_knot_locations=True,
... gam_columns=["C6","C7","C8"],
... num_knots=[3,4,5])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_knot_locations()
""",
scale_tp_penalty_mat="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.cs
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... scale_tp_penalty_mat=True,
... gam_columns=["C6","C7","C8"],
... bs=[1,1,1])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
splines_non_negative="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv")
>>> y = "C21"
>>> x = ["C19","C20"]
>>> numKnots = [5,5,5]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian',
... gam_columns=["C16","C17","C18"],
... bs=[2,2,2],
... splines_non_negative=[True, True, True])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
spline_orders="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv")
>>> y = "C21"
>>> x = ["C19","C20"]
>>> numKnots = [5,5,5]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian',
... gam_columns=["C16","C17","C18"],
... bs=[2,2,2],
... spline_orders=[3,4,5])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
standardize_tp_gam_cols="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv")
>>> y = "C21"
>>> x = ["C19","C20"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian',
... gam_columns=["C16","C17","C18"],
... bs=[1,1,1],
... standardize_tp_gam_cols=True)
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
)
Loading

0 comments on commit a064dfc

Please sign in to comment.