diff --git a/h2o-bindings/bin/custom/python/gen_anovaglm.py b/h2o-bindings/bin/custom/python/gen_anovaglm.py index 7ff4fa781836..e86bc160f52c 100644 --- a/h2o-bindings/bin/custom/python/gen_anovaglm.py +++ b/h2o-bindings/bin/custom/python/gen_anovaglm.py @@ -13,6 +13,7 @@ def Lambda(self, value): def result(self): """ Get result frame that contains information about the model building process like for modelselection and anovaglm. + :return: the H2OFrame that contains information about the model building process like for modelselection and anovaglm. """ return H2OFrame._expr(expr=ExprNode("result", ASTId(self.key)))._frame(fill_cache=True) @@ -55,3 +56,48 @@ def result(self): high p-values while those with more contributions will have low p-values. """ ) +examples = dict( + highest_interaction_term=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2OANOVAGLMEstimator +>>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip") +>>> x = ['AGE', 'VOL', 'DCAPS'] +>>> y = 'CAPSULE' +>>> anova_model = H2OANOVAGLMEstimator(family='binomial', +... lambda_=0, +... missing_values_handling="skip", +... highest_interaction_term=2) +>>> anova_model.train(x=x, y=y, training_frame=train) +>>> anova_model.summary() +""", + link=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2OANOVAGLMEstimator +>>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip") +>>> x = ['AGE', 'VOL', 'DCAPS'] +>>> y = 'CAPSULE' +>>> anova_model = H2OANOVAGLMEstimator(family='binomial', +... lambda_=0, +... missing_values_handling="skip", +... link="family_default") +>>> anova_model.train(x=x, y=y, training_frame=train) +>>> anova_model.summary() +""", + save_transformed_framekeys=""" +>>> import h2o +>>> h2o.init() +>>> from h2o.estimators import H2OANOVAGLMEstimator +>>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip") +>>> x = ['AGE', 'VOL', 'DCAPS'] +>>> y = 'CAPSULE' +>>> anova_model = H2OANOVAGLMEstimator(family='binomial', +... lambda_=0, +... missing_values_handling="skip", +... save_transformed_framekeys=True) +>>> anova_model.train(x=x, y=y, training_frame=train) +>>> transformFrame = h2o.get_frame(anova_model._model_json['output']['transformed_columns_key']['name']) +>>> print(transformFrame) +""" +) diff --git a/h2o-bindings/bin/custom/python/gen_gam.py b/h2o-bindings/bin/custom/python/gen_gam.py index 76077d21221d..d78a75814746 100644 --- a/h2o-bindings/bin/custom/python/gen_gam.py +++ b/h2o-bindings/bin/custom/python/gen_gam.py @@ -40,7 +40,7 @@ def scoring_history(self): def get_knot_locations(self, gam_column=None): """ - Retrieve gam columns knot locations if store_knot_location parameter is enabled. If a gam column name is + Retrieve gam columns knot locations if store_knot_locations parameter is enabled. If a gam column name is specified, the know loations corresponding to that gam column is returned. Otherwise, all knot locations are returned for all gam columns. The order of the gam columns are specified in gam_knot_column_names of the model output. @@ -61,8 +61,8 @@ def get_knot_locations(self, gam_column=None): def get_gam_knot_column_names(self): """ - Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_location - parameter is enabled. + Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_locations + parameter is enabled. :return: gam column names whose knot locations are stored in the knot_locations. """ @@ -117,3 +117,172 @@ def get_gam_knot_column_names(self): MSE, AUC (for logistic regression), degrees of freedom, and confusion matrices. """ ) +examples = dict( + bs=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... gam_columns=["C6","C7","C8"], +... bs=[0,1,3]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.coef() # note the spline type in the names of gam column coefficients +""", + gam_columns=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... gam_columns=["C6","C7","C8"]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.coef() +""", + get_gam_knot_column_names=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... store_knot_locations=True, +... gam_columns=["C6","C7","C8"]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.get_gam_knot_column_names() +""", + get_knot_locations=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... store_knot_locations=True, +... gam_columns=["C6","C7","C8"]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.get_knot_locations() +""", + keep_gam_cols=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> train, test = h2o_data.split_frame(ratios = [.8]) +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... keep_gam_cols=True, +... gam_columns=["C6","C7","C8"]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o.get_frame(h2o_model._model_json["output"] ["gam_transformed_center_key"]) +""", + knot_ids=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> knots1 = [-1.99905699, -0.98143075, 0.02599159, 1.00770987, 1.99942290] +>>> frameKnots1 = h2o.H2OFrame(python_obj=knots1) +>>> knots2 = [-1.999821861, -1.005257990, -0.006716042, 1.002197392, 1.999073589] +>>> frameKnots2 = h2o.H2OFrame(python_obj=knots2) +>>> knots3 = [-1.999675688, -0.979893796, 0.007573327, 1.011437347, 1.999611676] +>>> frameKnots3 = h2o.H2OFrame(python_obj=knots3) +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")() +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> train, test = h2o_data.split_frame(ratios = [.8]) +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... gam_columns=["C6","C7","C8"], +... store_knot_locations=True, +... knot_ids=[frameKnots1.key, frameKnots2.key, frameKnots3.key]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.get_knot_locations() +""", + num_knots=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> train, test = h2o_data.split_frame(ratios = [.8]) +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... store_knot_locations=True, +... gam_columns=["C6","C7","C8"], +... num_knots=[3,4,5]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.get_knot_locations() +""", + scale_tp_penalty_mat=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.cs +>>> h2o_data["C11"] = h2o_data["C11"].asfactor() +>>> y = "C11" +>>> x = ["C9","C10"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', +... scale_tp_penalty_mat=True, +... gam_columns=["C6","C7","C8"], +... bs=[1,1,1]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.coef() +""", + splines_non_negative=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv") +>>> y = "C21" +>>> x = ["C19","C20"] +>>> numKnots = [5,5,5] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian', +... gam_columns=["C16","C17","C18"], +... bs=[2,2,2], +... splines_non_negative=[True, True, True]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.coef() +""", + spline_orders=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv") +>>> y = "C21" +>>> x = ["C19","C20"] +>>> numKnots = [5,5,5] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian', +... gam_columns=["C16","C17","C18"], +... bs=[2,2,2], +... spline_orders=[3,4,5]) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.coef() +""", + standardize_tp_gam_cols=""" +>>> import h2o +>>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator +>>> h2o.init() +>>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv") +>>> y = "C21" +>>> x = ["C19","C20"] +>>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian', +... gam_columns=["C16","C17","C18"], +... bs=[1,1,1], +... standardize_tp_gam_cols=True) +>>> h2o_model.train(x=x, y=y, training_frame=h2o_data) +>>> h2o_model.coef() +""", +) diff --git a/h2o-core/src/main/java/water/H2O.java b/h2o-core/src/main/java/water/H2O.java index 30f57863b423..96da2a76bd28 100644 --- a/h2o-core/src/main/java/water/H2O.java +++ b/h2o-core/src/main/java/water/H2O.java @@ -2410,6 +2410,11 @@ public static void main( String[] args ) { // Validate arguments validateArguments(); + + // Raise user warnings + if (H2O.ARGS.web_ip == null) { + Log.warn("SECURITY_WARNING: web_ip is not specified. H2O Rest API is listening on all available interfaces."); + } Log.info("X-h2o-cluster-id: " + H2O.CLUSTER_ID); Log.info("User name: '" + H2O.ARGS.user_name + "'"); diff --git a/h2o-core/src/main/java/water/H2OStarter.java b/h2o-core/src/main/java/water/H2OStarter.java index ddf99d840b96..ac4ecd98a8f5 100644 --- a/h2o-core/src/main/java/water/H2OStarter.java +++ b/h2o-core/src/main/java/water/H2OStarter.java @@ -39,7 +39,10 @@ public static void start(String[] args, String relativeResourcePath, boolean fin if (!H2O.ARGS.disable_web) { Log.info(""); String message = H2O.ARGS.disable_flow ? "Connect to H2O from your R/Python client: " : "Open H2O Flow in your web browser: "; - Log.info(message + H2O.getURL(NetworkInit.h2oHttpView.getScheme())); + message += H2O.ARGS.web_ip == null ? + H2O.getURL(NetworkInit.h2oHttpView.getScheme()) : + H2O.getURL(NetworkInit.h2oHttpView.getScheme(), H2O.ARGS.web_ip, H2O.API_PORT, H2O.ARGS.context_path); + Log.info(message); Log.info(""); } } diff --git a/h2o-logging/impl-log4j2/src/main/java/water/util/LoggerBackend.java b/h2o-logging/impl-log4j2/src/main/java/water/util/LoggerBackend.java index 714bba01d945..886b18af8d2d 100644 --- a/h2o-logging/impl-log4j2/src/main/java/water/util/LoggerBackend.java +++ b/h2o-logging/impl-log4j2/src/main/java/water/util/LoggerBackend.java @@ -83,6 +83,11 @@ public void reconfigureLog4J() { .addAttribute("target", "SYSTEM_OUT") .add(layoutComponentBuilder)); + builder.add(builder.newAppender("stderr", "Console") + .addAttribute("target", "SYSTEM_ERR") + .add(builder.newFilter("ThresholdFilter", Filter.Result.ACCEPT, Filter.Result.DENY).addAttribute("level", Level.ERROR)) + .add(layoutComponentBuilder)); + builder.add(newRollingFileAppenderComponent(builder, "R1", "1MB", _getLogFilePath.apply("trace"), pattern, Level.TRACE)); builder.add(newRollingFileAppenderComponent(builder, "R2", _maxLogFileSize, _getLogFilePath.apply("debug"), pattern, Level.DEBUG)); builder.add(newRollingFileAppenderComponent(builder, "R3", _maxLogFileSize, _getLogFilePath.apply("info"), pattern, Level.INFO)); @@ -92,6 +97,7 @@ public void reconfigureLog4J() { builder.add(newRollingFileAppenderComponent(builder, "HTTPD", "1MB", _getLogFilePath.apply("httpd"), "%d{ISO8601} " + patternTail, Level.TRACE)); AppenderRefComponentBuilder consoleAppenderRef = builder.newAppenderRef("Console"); + AppenderRefComponentBuilder stderrAppenderRef = builder.newAppenderRef("stderr"); // configure loggers: List appenderReferences = new ArrayList(); @@ -102,11 +108,12 @@ public void reconfigureLog4J() { appenderReferences.add(builder.newAppenderRef("R5")); appenderReferences.add(builder.newAppenderRef("R6")); appenderReferences.add(consoleAppenderRef); + appenderReferences.add(stderrAppenderRef); builder.add(newLoggerComponent(builder, "hex", appenderReferences)); builder.add(newLoggerComponent(builder, "water", appenderReferences)); builder.add(newLoggerComponent(builder, "ai.h2o", appenderReferences)); - builder.add(builder.newRootLogger(String.valueOf(L4J_LVLS[_level])).add(consoleAppenderRef)); + builder.add(builder.newRootLogger(String.valueOf(L4J_LVLS[_level])).add(consoleAppenderRef).add(stderrAppenderRef)); // Turn down the logging for some class hierarchies. builder.add(newLoggerComponent(builder, "org.apache.http", appenderReferences, "WARN")); diff --git a/h2o-logging/impl-log4j2/src/main/resources/log4j2.xml b/h2o-logging/impl-log4j2/src/main/resources/log4j2.xml index 9d50683459f4..842d8aaa6888 100644 --- a/h2o-logging/impl-log4j2/src/main/resources/log4j2.xml +++ b/h2o-logging/impl-log4j2/src/main/resources/log4j2.xml @@ -4,10 +4,15 @@ + + + + + diff --git a/h2o-py/h2o/backend/server.py b/h2o-py/h2o/backend/server.py index b71e52d8e1bf..cab4512bf2cc 100644 --- a/h2o-py/h2o/backend/server.py +++ b/h2o-py/h2o/backend/server.py @@ -372,6 +372,16 @@ def _launch_server(self, port, baseport, mmax, mmin, ea, nthreads, jvm_custom_ar raise H2OServerError("Server wasn't able to start in %f seconds." % elapsed_time) time.sleep(0.2) + security_warning_message = "" + if os.stat(self._stdout).st_size > 0: + stdout_file = open(self._stdout, encoding='utf-8') + for line in stdout_file: + if re.search("SECURITY_WARNING", line): + security_warning_message += line + "\n" + stdout_file.close() + if security_warning_message: + warn("\nServer process startup raise a security warning:\n" + str(security_warning_message)) + @staticmethod def _check_java(java, verbose): jver_bytes = subprocess.check_output([java, "-version"], stderr=subprocess.STDOUT) diff --git a/h2o-py/h2o/estimators/anovaglm.py b/h2o-py/h2o/estimators/anovaglm.py index 3cf7d2d6f3c4..ea01bac719c5 100644 --- a/h2o-py/h2o/estimators/anovaglm.py +++ b/h2o-py/h2o/estimators/anovaglm.py @@ -530,6 +530,21 @@ def link(self): Type: ``Literal["family_default", "identity", "logit", "log", "inverse", "tweedie", "ologit"]``, defaults to ``"family_default"``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2OANOVAGLMEstimator + >>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip") + >>> x = ['AGE', 'VOL', 'DCAPS'] + >>> y = 'CAPSULE' + >>> anova_model = H2OANOVAGLMEstimator(family='binomial', + ... lambda_=0, + ... missing_values_handling="skip", + ... link="family_default") + >>> anova_model.train(x=x, y=y, training_frame=train) + >>> anova_model.summary() """ return self._parms.get("link") @@ -722,6 +737,22 @@ def save_transformed_framekeys(self): true to save the keys of transformed predictors and interaction column. Type: ``bool``, defaults to ``False``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2OANOVAGLMEstimator + >>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip") + >>> x = ['AGE', 'VOL', 'DCAPS'] + >>> y = 'CAPSULE' + >>> anova_model = H2OANOVAGLMEstimator(family='binomial', + ... lambda_=0, + ... missing_values_handling="skip", + ... save_transformed_framekeys=True) + >>> anova_model.train(x=x, y=y, training_frame=train) + >>> transformFrame = h2o.get_frame(anova_model._model_json['output']['transformed_columns_key']['name']) + >>> print(transformFrame) """ return self._parms.get("save_transformed_framekeys") @@ -737,6 +768,21 @@ def highest_interaction_term(self): on... Default to 2. Type: ``int``, defaults to ``0``. + + :examples: + + >>> import h2o + >>> h2o.init() + >>> from h2o.estimators import H2OANOVAGLMEstimator + >>> train = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate_complete.csv.zip") + >>> x = ['AGE', 'VOL', 'DCAPS'] + >>> y = 'CAPSULE' + >>> anova_model = H2OANOVAGLMEstimator(family='binomial', + ... lambda_=0, + ... missing_values_handling="skip", + ... highest_interaction_term=2) + >>> anova_model.train(x=x, y=y, training_frame=train) + >>> anova_model.summary() """ return self._parms.get("highest_interaction_term") @@ -785,6 +831,7 @@ def Lambda(self, value): def result(self): """ Get result frame that contains information about the model building process like for modelselection and anovaglm. + :return: the H2OFrame that contains information about the model building process like for modelselection and anovaglm. """ return H2OFrame._expr(expr=ExprNode("result", ASTId(self.key)))._frame(fill_cache=True) diff --git a/h2o-py/h2o/estimators/gam.py b/h2o-py/h2o/estimators/gam.py index e1fb334f020d..bcba5dcf4a4c 100644 --- a/h2o-py/h2o/estimators/gam.py +++ b/h2o-py/h2o/estimators/gam.py @@ -888,6 +888,22 @@ def splines_non_negative(self): true. Type: ``List[bool]``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv") + >>> y = "C21" + >>> x = ["C19","C20"] + >>> numKnots = [5,5,5] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian', + ... gam_columns=["C16","C17","C18"], + ... bs=[2,2,2], + ... splines_non_negative=[True, True, True]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.coef() """ return self._parms.get("splines_non_negative") @@ -1273,6 +1289,23 @@ def num_knots(self): I-splines, mininum = 2, for cs spline, minimum = 3. For thin plate, minimum is size of polynomial basis + 2. Type: ``List[int]``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") + >>> h2o_data["C11"] = h2o_data["C11"].asfactor() + >>> train, test = h2o_data.split_frame(ratios = [.8]) + >>> y = "C11" + >>> x = ["C9","C10"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', + ... store_knot_locations=True, + ... gam_columns=["C6","C7","C8"], + ... num_knots=[3,4,5]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.get_knot_locations() """ return self._parms.get("num_knots") @@ -1290,6 +1323,22 @@ def spline_orders(self): be ignored. Type: ``List[int]``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv") + >>> y = "C21" + >>> x = ["C19","C20"] + >>> numKnots = [5,5,5] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian', + ... gam_columns=["C16","C17","C18"], + ... bs=[2,2,2], + ... spline_orders=[3,4,5]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.coef() """ return self._parms.get("spline_orders") @@ -1304,6 +1353,29 @@ def knot_ids(self): Array storing frame keys of knots. One for each gam column set specified in gam_columns Type: ``List[str]``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> knots1 = [-1.99905699, -0.98143075, 0.02599159, 1.00770987, 1.99942290] + >>> frameKnots1 = h2o.H2OFrame(python_obj=knots1) + >>> knots2 = [-1.999821861, -1.005257990, -0.006716042, 1.002197392, 1.999073589] + >>> frameKnots2 = h2o.H2OFrame(python_obj=knots2) + >>> knots3 = [-1.999675688, -0.979893796, 0.007573327, 1.011437347, 1.999611676] + >>> frameKnots3 = h2o.H2OFrame(python_obj=knots3) + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv")() + >>> h2o_data["C11"] = h2o_data["C11"].asfactor() + >>> train, test = h2o_data.split_frame(ratios = [.8]) + >>> y = "C11" + >>> x = ["C9","C10"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', + ... gam_columns=["C6","C7","C8"], + ... store_knot_locations=True, + ... knot_ids=[frameKnots1.key, frameKnots2.key, frameKnots3.key]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.get_knot_locations() """ return self._parms.get("knot_ids") @@ -1319,6 +1391,20 @@ def gam_columns(self): {{'c1'},{'c2','c3'},{'c4'},...} Type: ``List[List[str]]``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") + >>> h2o_data["C11"] = h2o_data["C11"].asfactor() + >>> y = "C11" + >>> x = ["C9","C10"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', + ... gam_columns=["C6","C7","C8"]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.coef() """ return self._parms.get("gam_columns") @@ -1335,6 +1421,21 @@ def standardize_tp_gam_cols(self): standardize tp (thin plate) predictor columns Type: ``bool``, defaults to ``False``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/binomial_20_cols_10KRows.csv") + >>> y = "C21" + >>> x = ["C19","C20"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='gaussian', + ... gam_columns=["C16","C17","C18"], + ... bs=[1,1,1], + ... standardize_tp_gam_cols=True) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.coef() """ return self._parms.get("standardize_tp_gam_cols") @@ -1349,6 +1450,22 @@ def scale_tp_penalty_mat(self): Scale penalty matrix for tp (thin plate) smoothers as in R Type: ``bool``, defaults to ``False``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.cs + >>> h2o_data["C11"] = h2o_data["C11"].asfactor() + >>> y = "C11" + >>> x = ["C9","C10"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', + ... scale_tp_penalty_mat=True, + ... gam_columns=["C6","C7","C8"], + ... bs=[1,1,1]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.coef() """ return self._parms.get("scale_tp_penalty_mat") @@ -1365,6 +1482,21 @@ def bs(self): specified, must be the same size as gam_columns Type: ``List[int]``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") + >>> h2o_data["C11"] = h2o_data["C11"].asfactor() + >>> y = "C11" + >>> x = ["C9","C10"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', + ... gam_columns=["C6","C7","C8"], + ... bs=[0,1,3]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o_model.coef() # note the spline type in the names of gam column coefficients """ return self._parms.get("bs") @@ -1393,6 +1525,22 @@ def keep_gam_cols(self): Save keys of model matrix Type: ``bool``, defaults to ``False``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.gam import H2OGeneralizedAdditiveEstimator + >>> h2o.init() + >>> h2o_data = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/glm_test/multinomial_10_classes_10_cols_10000_Rows_train.csv") + >>> h2o_data["C11"] = h2o_data["C11"].asfactor() + >>> train, test = h2o_data.split_frame(ratios = [.8]) + >>> y = "C11" + >>> x = ["C9","C10"] + >>> h2o_model = H2OGeneralizedAdditiveEstimator(family='multinomial', + ... keep_gam_cols=True, + ... gam_columns=["C6","C7","C8"]) + >>> h2o_model.train(x=x, y=y, training_frame=h2o_data) + >>> h2o.get_frame(h2o_model._model_json["output"] ["gam_transformed_center_key"]) """ return self._parms.get("keep_gam_cols") @@ -1472,7 +1620,7 @@ def scoring_history(self): def get_knot_locations(self, gam_column=None): """ - Retrieve gam columns knot locations if store_knot_location parameter is enabled. If a gam column name is + Retrieve gam columns knot locations if store_knot_locations parameter is enabled. If a gam column name is specified, the know loations corresponding to that gam column is returned. Otherwise, all knot locations are returned for all gam columns. The order of the gam columns are specified in gam_knot_column_names of the model output. @@ -1493,8 +1641,8 @@ def get_knot_locations(self, gam_column=None): def get_gam_knot_column_names(self): """ - Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_location - parameter is enabled. + Retrieve gam column names corresponding to the knot locations that will be returned if store_knot_locations + parameter is enabled. :return: gam column names whose knot locations are stored in the knot_locations. """ diff --git a/h2o-py/tests/testdir_apis/H2O_Init/h2o.init_test.py b/h2o-py/tests/testdir_apis/H2O_Init/h2o.init_test.py index 544e02c89ca7..8cf2b4858ebd 100644 --- a/h2o-py/tests/testdir_apis/H2O_Init/h2o.init_test.py +++ b/h2o-py/tests/testdir_apis/H2O_Init/h2o.init_test.py @@ -7,6 +7,7 @@ import tempfile import shutil import os +import warnings def h2oinit(): """ @@ -125,12 +126,26 @@ def h2oinit_with_extra_classpath(): finally: h2o.cluster().shutdown() +def h2oinit_bind_to_localhost_false(): + warnings.filterwarnings("error", category=UserWarning) + try: + h2o.init(strict_version_check=False, bind_to_localhost=False, port=44000) + assert False + except UserWarning as warning: + print(str(warning)) + assert "SECURITY_WARNING" in str(warning) + finally: + warnings.resetwarnings() + h2o.connect(port=44000) + h2o.cluster().shutdown() + # None of the tests below need a pre initialized instance h2oinit_default_log_dir() h2oinit_custom_log_dir() h2oinit_fail_invalid_log_level() h2oinitname() h2oinit_with_extra_classpath() +h2oinit_bind_to_localhost_false() if __name__ == "__main__": pyunit_utils.standalone_test(h2oinit) diff --git a/h2o-r/h2o-package/R/connection.R b/h2o-r/h2o-package/R/connection.R index f449777653d9..8971ec9d335a 100755 --- a/h2o-r/h2o-package/R/connection.R +++ b/h2o-r/h2o-package/R/connection.R @@ -237,6 +237,17 @@ h2o.init <- function(ip = "localhost", port = 54321, name = NA_character_, start stop("H2O failed to start, stopping execution.") } + + securityWarnings <- "" + if (file.info(stdout)$size > 0) { + securityWarnings <- grep("SECURITY_WARNING", readLines(stdout), value=TRUE) + } + if (length(securityWarnings) > 0) { + msg = paste( + "Server process startup raise a security warning:", + paste(securityWarnings, collapse = "\n"), sep = "\n") + warning(msg) + } } else stop("Can only start H2O launcher if IP address is localhost.") } diff --git a/scripts/jenkins/Makefile.jenkins b/scripts/jenkins/Makefile.jenkins index 3c80d4ddc044..00533ce462c3 100755 --- a/scripts/jenkins/Makefile.jenkins +++ b/scripts/jenkins/Makefile.jenkins @@ -14,6 +14,18 @@ build-h2o-3: exit 1; \ fi +test-logger-initialize-properly: + -java -jar build/h2o.jar -ip 127.0.0.1 -web_ip 10.20.30.40 > out.txt 2>&1 + if [[ $$(grep "ERROR" out.txt | wc -l) == "2" ]]; then \ + rm -f out.txt; \ + echo "All good"; \ + exit 0; \ + else \ + rm -f out.txt; \ + echo "All LOG.error() should be also printed to std.err. The out.txt doesn't contain 2 ERRORs, please check the output."; \ + exit 1; \ + fi + warmup-caches: ./gradlew build compileTestJava -x test -x h2o-r:build $$ADDITIONAL_GRADLE_OPTS diff --git a/scripts/jenkins/groovy/defineTestStages.groovy b/scripts/jenkins/groovy/defineTestStages.groovy index c13732b32a23..f02a3667cf43 100644 --- a/scripts/jenkins/groovy/defineTestStages.groovy +++ b/scripts/jenkins/groovy/defineTestStages.groovy @@ -561,6 +561,10 @@ def call(final pipelineContext) { stageName: 'R4.0 Explain', target: 'test-r-explain', rVersion: '4.0.2', timeoutValue: 180, component: pipelineContext.getBuildConfig().COMPONENT_R ], + [ + stageName: 'LOGGER inicialization test', target: 'test-logger-initialize-properly', javaVersion: 8, timeoutValue: 1, + component: pipelineContext.getBuildConfig().COMPONENT_JAVA + ] ] def supportedHadoopDists = pipelineContext.getBuildConfig().getSupportedHadoopDistributions()