Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/rel-3.44.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
CI user committed Mar 1, 2024
2 parents bb5d0ee + a064dfc commit 70f43c9
Show file tree
Hide file tree
Showing 13 changed files with 490 additions and 8 deletions.
46 changes: 46 additions & 0 deletions h2o-bindings/bin/custom/python/gen_anovaglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def Lambda(self, value):
def result(self):
"""
Get result frame that contains information about the model building process like for modelselection and anovaglm.
:return: the H2OFrame that contains information about the model building process like for modelselection and anovaglm.
"""
return H2OFrame._expr(expr=ExprNode("result", ASTId(self.key)))._frame(fill_cache=True)
Expand Down Expand Up @@ -55,3 +56,48 @@ def result(self):
high p-values while those with more contributions will have low p-values.
"""
)
examples = dict(
highest_interaction_term="""
>>> import h2o
>>> h2o.init()
>>> from h2o.estimators import H2OANOVAGLMEstimator
>>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip")
>>> x = ['AGE', 'VOL', 'DCAPS']
>>> y = 'CAPSULE'
>>> anova_model = H2OANOVAGLMEstimator(family='binomial',
... lambda_=0,
... missing_values_handling="skip",
... highest_interaction_term=2)
>>> anova_model.train(x=x, y=y, training_frame=train)
>>> anova_model.summary()
""",
link="""
>>> import h2o
>>> h2o.init()
>>> from h2o.estimators import H2OANOVAGLMEstimator
>>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip")
>>> x = ['AGE', 'VOL', 'DCAPS']
>>> y = 'CAPSULE'
>>> anova_model = H2OANOVAGLMEstimator(family='binomial',
... lambda_=0,
... missing_values_handling="skip",
... link="family_default")
>>> anova_model.train(x=x, y=y, training_frame=train)
>>> anova_model.summary()
""",
save_transformed_framekeys="""
>>> import h2o
>>> h2o.init()
>>> from h2o.estimators import H2OANOVAGLMEstimator
>>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip")
>>> x = ['AGE', 'VOL', 'DCAPS']
>>> y = 'CAPSULE'
>>> anova_model = H2OANOVAGLMEstimator(family='binomial',
... lambda_=0,
... missing_values_handling="skip",
... save_transformed_framekeys=True)
>>> anova_model.train(x=x, y=y, training_frame=train)
>>> transformFrame = h2o.get_frame(anova_model._model_json['output']['transformed_columns_key']['name'])
>>> print(transformFrame)
"""
)
175 changes: 172 additions & 3 deletions h2o-bindings/bin/custom/python/gen_gam.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def scoring_history(self):

def get_knot_locations(self, gam_column=None):
"""
Retrieve gam columns knot locations if store_knot_location parameter is enabled. If a gam column name is
Retrieve gam columns knot locations if store_knot_locations parameter is enabled. If a gam column name is
specified, the know loations corresponding to that gam column is returned. Otherwise, all knot locations are
returned for all gam columns. The order of the gam columns are specified in gam_knot_column_names of the
model output.
Expand All @@ -61,8 +61,8 @@ def get_knot_locations(self, gam_column=None):

def get_gam_knot_column_names(self):
"""
Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_location
parameter is enabled.
Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_locations
parameter is enabled.
:return: gam column names whose knot locations are stored in the knot_locations.
"""
Expand Down Expand Up @@ -117,3 +117,172 @@ def get_gam_knot_column_names(self):
MSE, AUC (for logistic regression), degrees of freedom, and confusion matrices.
"""
)
examples = dict(
bs="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... gam_columns=["C6","C7","C8"],
... bs=[0,1,3])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef() # note the spline type in the names of gam column coefficients
""",
gam_columns="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
get_gam_knot_column_names="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... store_knot_locations=True,
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_gam_knot_column_names()
""",
get_knot_locations="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... store_knot_locations=True,
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_knot_locations()
""",
keep_gam_cols="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> train, test = h2o_data.split_frame(ratios = [.8])
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... keep_gam_cols=True,
... gam_columns=["C6","C7","C8"])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o.get_frame(h2o_model._model_json["output"] ["gam_transformed_center_key"])
""",
knot_ids="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> knots1 = [-1.99905699, -0.98143075, 0.02599159, 1.00770987, 1.99942290]
>>> frameKnots1 = h2o.H2OFrame(python_obj=knots1)
>>> knots2 = [-1.999821861, -1.005257990, -0.006716042, 1.002197392, 1.999073589]
>>> frameKnots2 = h2o.H2OFrame(python_obj=knots2)
>>> knots3 = [-1.999675688, -0.979893796, 0.007573327, 1.011437347, 1.999611676]
>>> frameKnots3 = h2o.H2OFrame(python_obj=knots3)
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")()
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> train, test = h2o_data.split_frame(ratios = [.8])
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... gam_columns=["C6","C7","C8"],
... store_knot_locations=True,
... knot_ids=[frameKnots1.key, frameKnots2.key, frameKnots3.key])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_knot_locations()
""",
num_knots="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> train, test = h2o_data.split_frame(ratios = [.8])
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... store_knot_locations=True,
... gam_columns=["C6","C7","C8"],
... num_knots=[3,4,5])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.get_knot_locations()
""",
scale_tp_penalty_mat="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.cs
>>> h2o_data["C11"] = h2o_data["C11"].asfactor()
>>> y = "C11"
>>> x = ["C9","C10"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial',
... scale_tp_penalty_mat=True,
... gam_columns=["C6","C7","C8"],
... bs=[1,1,1])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
splines_non_negative="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv")
>>> y = "C21"
>>> x = ["C19","C20"]
>>> numKnots = [5,5,5]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian',
... gam_columns=["C16","C17","C18"],
... bs=[2,2,2],
... splines_non_negative=[True, True, True])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
spline_orders="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv")
>>> y = "C21"
>>> x = ["C19","C20"]
>>> numKnots = [5,5,5]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian',
... gam_columns=["C16","C17","C18"],
... bs=[2,2,2],
... spline_orders=[3,4,5])
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
standardize_tp_gam_cols="""
>>> import h2o
>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator
>>> h2o.init()
>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv")
>>> y = "C21"
>>> x = ["C19","C20"]
>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian',
... gam_columns=["C16","C17","C18"],
... bs=[1,1,1],
... standardize_tp_gam_cols=True)
>>> h2o_model.train(x=x, y=y, training_frame=h2o_data)
>>> h2o_model.coef()
""",
)
5 changes: 5 additions & 0 deletions h2o-core/src/main/java/water/H2O.java
Original file line number Diff line number Diff line change
Expand Up @@ -2410,6 +2410,11 @@ public static void main( String[] args ) {

// Validate arguments
validateArguments();

// Raise user warnings
if (H2O.ARGS.web_ip == null) {
Log.warn("SECURITY_WARNING: web_ip is not specified. H2O Rest API is listening on all available interfaces.");
}

Log.info("X-h2o-cluster-id: " + H2O.CLUSTER_ID);
Log.info("User name: '" + H2O.ARGS.user_name + "'");
Expand Down
5 changes: 4 additions & 1 deletion h2o-core/src/main/java/water/H2OStarter.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ public static void start(String[] args, String relativeResourcePath, boolean fin
if (!H2O.ARGS.disable_web) {
Log.info("");
String message = H2O.ARGS.disable_flow ? "Connect to H2O from your R/Python client: " : "Open H2O Flow in your web browser: ";
Log.info(message + H2O.getURL(NetworkInit.h2oHttpView.getScheme()));
message += H2O.ARGS.web_ip == null ?
H2O.getURL(NetworkInit.h2oHttpView.getScheme()) :
H2O.getURL(NetworkInit.h2oHttpView.getScheme(), H2O.ARGS.web_ip, H2O.API_PORT, H2O.ARGS.context_path);
Log.info(message);
Log.info("");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public void reconfigureLog4J() {
.addAttribute("target", "SYSTEM_OUT")
.add(layoutComponentBuilder));

builder.add(builder.newAppender("stderr", "Console")
.addAttribute("target", "SYSTEM_ERR")
.add(builder.newFilter("ThresholdFilter", Filter.Result.ACCEPT, Filter.Result.DENY).addAttribute("level", Level.ERROR))
.add(layoutComponentBuilder));

builder.add(newRollingFileAppenderComponent(builder, "R1", "1MB", _getLogFilePath.apply("trace"), pattern, Level.TRACE));
builder.add(newRollingFileAppenderComponent(builder, "R2", _maxLogFileSize, _getLogFilePath.apply("debug"), pattern, Level.DEBUG));
builder.add(newRollingFileAppenderComponent(builder, "R3", _maxLogFileSize, _getLogFilePath.apply("info"), pattern, Level.INFO));
Expand All @@ -92,6 +97,7 @@ public void reconfigureLog4J() {
builder.add(newRollingFileAppenderComponent(builder, "HTTPD", "1MB", _getLogFilePath.apply("httpd"), "%d{ISO8601} " + patternTail, Level.TRACE));

AppenderRefComponentBuilder consoleAppenderRef = builder.newAppenderRef("Console");
AppenderRefComponentBuilder stderrAppenderRef = builder.newAppenderRef("stderr");

// configure loggers:
List<AppenderRefComponentBuilder> appenderReferences = new ArrayList();
Expand All @@ -102,11 +108,12 @@ public void reconfigureLog4J() {
appenderReferences.add(builder.newAppenderRef("R5"));
appenderReferences.add(builder.newAppenderRef("R6"));
appenderReferences.add(consoleAppenderRef);
appenderReferences.add(stderrAppenderRef);

builder.add(newLoggerComponent(builder, "hex", appenderReferences));
builder.add(newLoggerComponent(builder, "water", appenderReferences));
builder.add(newLoggerComponent(builder, "ai.h2o", appenderReferences));
builder.add(builder.newRootLogger(String.valueOf(L4J_LVLS[_level])).add(consoleAppenderRef));
builder.add(builder.newRootLogger(String.valueOf(L4J_LVLS[_level])).add(consoleAppenderRef).add(stderrAppenderRef));

// Turn down the logging for some class hierarchies.
builder.add(newLoggerComponent(builder, "org.apache.http", appenderReferences, "WARN"));
Expand Down
5 changes: 5 additions & 0 deletions h2o-logging/impl-log4j2/src/main/resources/log4j2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
<Console name="stderr" target="SYSTEM_ERR"> <!-- 2 -->
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
<ThresholdFilter level="ERROR" onMatch="ACCEPT" onMismatch="DENY"/> <!-- 3 -->
</Console>
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console"/>
<AppenderRef ref="stderr"/>
</Root>
</Loggers>
</Configuration>
10 changes: 10 additions & 0 deletions h2o-py/h2o/backend/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,16 @@ def _launch_server(self, port, baseport, mmax, mmin, ea, nthreads, jvm_custom_ar
raise H2OServerError("Server wasn't able to start in %f seconds." % elapsed_time)
time.sleep(0.2)

security_warning_message = ""
if os.stat(self._stdout).st_size > 0:
stdout_file = open(self._stdout, encoding='utf-8')
for line in stdout_file:
if re.search("SECURITY_WARNING", line):
security_warning_message += line + "\n"
stdout_file.close()
if security_warning_message:
warn("\nServer process startup raise a security warning:\n" + str(security_warning_message))

@staticmethod
def _check_java(java, verbose):
jver_bytes = subprocess.check_output([java, "-version"], stderr=subprocess.STDOUT)
Expand Down
Loading

0 comments on commit 70f43c9

Please sign in to comment.