From 770db3417831c1cec7843901f4a19867216cd66e Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Wed, 17 Jan 2024 17:59:55 +0100 Subject: [PATCH 01/37] GH-15809: implement AIC and Loglikelihood calculation for GenericModel --- .../main/java/hex/generic/GenericModel.java | 48 +++++++++++++++++++ h2o-core/src/main/java/hex/Model.java | 4 ++ .../genmodel/algos/glm/GlmMojoModelBase.java | 4 ++ 3 files changed, 56 insertions(+) diff --git a/h2o-algos/src/main/java/hex/generic/GenericModel.java b/h2o-algos/src/main/java/hex/generic/GenericModel.java index 24adcb746e60..2bac8ad31737 100644 --- a/h2o-algos/src/main/java/hex/generic/GenericModel.java +++ b/h2o-algos/src/main/java/hex/generic/GenericModel.java @@ -2,12 +2,14 @@ import hex.*; import hex.genmodel.*; +import hex.genmodel.algos.glm.GlmMojoModel; import hex.genmodel.algos.kmeans.KMeansMojoModel; import hex.genmodel.descriptor.ModelDescriptor; import hex.genmodel.descriptor.ModelDescriptorBuilder; import hex.genmodel.easy.EasyPredictModelWrapper; import hex.genmodel.easy.RowData; import hex.genmodel.easy.exception.PredictException; +import hex.glm.GLMModel; import hex.tree.isofor.ModelMetricsAnomaly; import water.*; import water.fvec.*; @@ -78,6 +80,10 @@ private static MojoModel reconstructMojo(ByteVec mojoBytes) { throw new IllegalStateException("Unreachable MOJO file: " + mojoBytes._key, e); } } + private Iced getParamByName(String name) { + return Arrays.stream(this._parms._modelParameters) + .filter(p -> Objects.equals(p.name, name)).findAny().get().actual_value; + } @Override public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) { @@ -131,6 +137,48 @@ protected PredictScoreResult predictScoreImpl(Frame fr, Frame adaptFrm, String d return predictScoreMojoImpl(fr, destination_key, j, computeMetrics); } else return super.predictScoreImpl(fr, adaptFrm, destination_key, j, computeMetrics, customMetricFunc); +// return super.predictScoreImpl(fr, adaptFrm, destination_key, j, true, customMetricFunc); + } + + + @Override + public double aic(double likelihood) { + // calculate negative loglikelihood specifically for GLM + if (!_algoName.equals("glm")) { + return 0; + } else { + double aic = -2 * likelihood + 2 * Arrays.stream(((GlmMojoModel) this.genModel()).getBeta()).filter(b -> b != 0).count(); + System.out.println("Bettas for AIC: " + Arrays.stream(((GlmMojoModel) this.genModel()).getBeta()).filter(b -> b != 0).count()); + System.out.println(Arrays.toString(((GlmMojoModel) this.genModel()).getBeta())); + System.out.println("Gen AIC: " + aic); + return aic; + } + } + + @Override + public double likelihood(double w, double y, double[] f) { + // calculate negative loglikelihood specifically for GLM + if (w == 0 || !_algoName.equals("glm")) { + return 0; + } else { + // create GLM parameters instance + GLMModel.GLMParameters glmParameters = new GLMModel.GLMParameters( + GLMModel.GLMParameters.Family.valueOf(getParamByName("family").toString()), + GLMModel.GLMParameters.Link.valueOf(getParamByName("link").toString()), + Arrays.stream(getParamByName("lambda").toString().trim().replaceAll("\\[", "") + .replaceAll("\\]", "").split(",\\s*")) + .mapToDouble(Double::parseDouble).toArray(), + Arrays.stream(getParamByName("alpha").toString().trim().replaceAll("\\[", "") + .replaceAll("\\]", "").split(",\\s*")) + .mapToDouble(Double::parseDouble).toArray(), + Double.parseDouble(getParamByName("tweedie_variance_power").toString()), + Double.parseDouble(getParamByName("tweedie_link_power").toString()), + null, + Double.parseDouble(getParamByName("theta").toString()) + ); + // time-consuming calculation for the final scoring for GLM model + return glmParameters.likelihood(w, y, f); + } } PredictScoreResult predictScoreMojoImpl(Frame fr, String destination_key, Job j, boolean computeMetrics) { diff --git a/h2o-core/src/main/java/hex/Model.java b/h2o-core/src/main/java/hex/Model.java index 1887e9f2948f..d2d2642ec015 100755 --- a/h2o-core/src/main/java/hex/Model.java +++ b/h2o-core/src/main/java/hex/Model.java @@ -1384,6 +1384,10 @@ public double likelihood(double w, double y, double[] f) { return 0.0; // place holder. This function is overridden in GLM. } + public double aic(double likelihood) { + return 0.0; // place holder. This function is overridden in GLM. + } + public ScoringInfo[] scoring_history() { return scoringInfo; } /** diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java index c8d068506b1e..de3f01431bf8 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java @@ -23,6 +23,10 @@ abstract class GlmMojoModelBase extends MojoModel { super(columns, domains, responseColumn); } + public double[] getBeta() { + return _beta; + } + void init() { _versionSupportOffset = _mojo_version >= 1.1; } From 434d444bdf163288b09d2c77791b8068b40f4740 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Wed, 17 Jan 2024 18:06:32 +0100 Subject: [PATCH 02/37] GH-15809: add AIC and Loglikelihood to ModelMetricsBinomial --- .../src/main/java/hex/psvm/MetricBuilderPSVM.java | 2 +- .../isofor/MetricBuilderAnomalySupervised.java | 2 +- .../src/main/java/hex/ModelMetricsBinomial.java | 15 +++++++++++++-- .../main/java/hex/ModelMetricsBinomialGLM.java | 2 +- .../java/hex/ModelMetricsBinomialGeneric.java | 2 +- .../schemas3/ModelMetricsBinomialGenericV3.java | 2 ++ .../api/schemas3/ModelMetricsBinomialV3.java | 6 ++++++ 7 files changed, 25 insertions(+), 6 deletions(-) diff --git a/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java b/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java index 25f0513c49c4..58006dac319f 100644 --- a/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java +++ b/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java @@ -76,7 +76,7 @@ public ModelMetrics makeModelMetrics(Model m, Frame f, Frame frameWithWeights, F } else { auc = AUC2.emptyAUC(); } - ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, Double.NaN, null, _customMetric); + ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, Double.NaN, Double.NaN, Double.NaN, null, _customMetric); if (m != null) m.addModelMetrics(mm); return mm; } diff --git a/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java b/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java index 8cd3b0a24e37..c10e4e5ed841 100644 --- a/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java +++ b/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java @@ -35,7 +35,7 @@ public MetricBuilderAnomalySupervised(String[] domain) { auc = AUC2.emptyAUC(); } ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, - sigma, auc, logloss, null, _customMetric); + sigma, auc, logloss, 0, 0, null, _customMetric); if (m != null) { m.addModelMetrics(mm); } diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java index 04a40a3f5ace..d69d70eb069d 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java @@ -19,15 +19,19 @@ public class ModelMetricsBinomial extends ModelMetricsSupervised { public final AUC2 _auc; public final double _logloss; + public final double _loglikelihood; + public final double _aic; public double _mean_per_class_error; public final GainsLift _gainsLift; public ModelMetricsBinomial(Model model, Frame frame, long nobs, double mse, String[] domain, - double sigma, AUC2 auc, double logloss, GainsLift gainsLift, + double sigma, AUC2 auc, double logloss, double loglikelihood, double aic, GainsLift gainsLift, CustomMetric customMetric) { super(model, frame, nobs, mse, domain, sigma, customMetric); _auc = auc; _logloss = logloss; + _loglikelihood = loglikelihood; + _aic = aic; _gainsLift = gainsLift; _mean_per_class_error = cm() == null ? Double.NaN : cm().mean_per_class_error(); } @@ -49,6 +53,8 @@ public String toString() { sb.append(" pr_auc: " + (float)_auc.pr_auc() + "\n"); } sb.append(" logloss: " + (float)_logloss + "\n"); + sb.append(" loglikelihood: " + (float)_loglikelihood + "\n"); + sb.append(" AIC: " + (float)_aic + "\n"); sb.append(" mean_per_class_error: " + (float)_mean_per_class_error + "\n"); sb.append(" default threshold: " + (_auc == null ? 0.5 : (float)_auc.defaultThreshold()) + "\n"); if (cm() != null) sb.append(" CM: " + cm().toASCII()); @@ -57,6 +63,8 @@ public String toString() { } public double logloss() { return _logloss; } + public double loglikelihood() { return _loglikelihood; } + public double aic() { return _aic; } public double mean_per_class_error() { return _mean_per_class_error; } @Override public AUC2 auc_obj() { return _auc; } @Override public ConfusionMatrix cm() { @@ -161,6 +169,7 @@ private static class BinomialMetrics extends MRTask { public static class MetricBuilderBinomial> extends MetricBuilderSupervised { protected double _logloss; + protected double _loglikelihood; protected AUC2.AUCBuilder _auc; public MetricBuilderBinomial( String[] domain ) { super(2,domain); _auc = new AUC2.AUCBuilder(AUC2.NBINS); } @@ -256,6 +265,8 @@ private ModelMetrics makeModelMetrics(final Model m, final Frame f, final Frame private ModelMetrics makeModelMetrics(Model m, Frame f, GainsLift gl) { double mse = Double.NaN; + double loglikelihood = Double.NaN; + double aic = Double.NaN; double logloss = Double.NaN; double sigma = Double.NaN; final AUC2 auc; @@ -267,7 +278,7 @@ private ModelMetrics makeModelMetrics(Model m, Frame f, GainsLift gl) { } else { auc = new AUC2(); } - ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, logloss, gl, _customMetric); + ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, logloss, loglikelihood, aic, gl, _customMetric); if (m!=null) m.addModelMetrics(mm); return mm; } diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java index 2fbcaa13c0b6..e47e3eb33fca 100644 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java @@ -14,7 +14,7 @@ public ModelMetricsBinomialGLM(Model model, Frame frame, long nobs, double mse, double sigma, AUC2 auc, double logloss, double resDev, double nullDev, double aic, long nDof, long rDof, GainsLift gainsLift, CustomMetric customMetric, double loglikelihood) { - super(model, frame, nobs, mse, domain, sigma, auc, logloss, gainsLift, customMetric); + super(model, frame, nobs, mse, domain, sigma, auc, logloss, loglikelihood, aic, gainsLift, customMetric); _resDev = resDev; _nullDev = nullDev; _AIC = aic; diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java index 462372adc04a..009526c07eec 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java @@ -16,7 +16,7 @@ public ModelMetricsBinomialGeneric(Model model, Frame frame, long nobs, double m CustomMetric customMetric, double mean_per_class_error, TwoDimTable thresholds_and_metric_scores, TwoDimTable max_criteria_and_metric_scores, TwoDimTable confusion_matrix, double r2, final String description) { - super(model, frame, nobs, mse, domain, sigma, auc, logloss, null, customMetric); + super(model, frame, nobs, mse, domain, sigma, auc, logloss, 0, 0, null, customMetric); _gainsLiftTable = gainsLiftTable; _thresholds_and_metric_scores = thresholds_and_metric_scores; _max_criteria_and_metric_scores = max_criteria_and_metric_scores; diff --git a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialGenericV3.java b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialGenericV3.java index 5e04418baaba..f774bf82a5e5 100644 --- a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialGenericV3.java +++ b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialGenericV3.java @@ -10,6 +10,8 @@ public S fillFromImpl(ModelMetricsBinomialGeneric modelMetrics) { super.fillFromImpl(modelMetrics); r2 = modelMetrics.r2(); logloss = modelMetrics._logloss; + loglikelihood = modelMetrics._loglikelihood; + AIC = modelMetrics._aic; if (modelMetrics != null && modelMetrics._confusion_matrix != null) { final ConfusionMatrixV3 convertedConfusionMatrix = new ConfusionMatrixV3(); diff --git a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java index c1e14a58b14e..f81d5bb97a8a 100644 --- a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java +++ b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java @@ -18,6 +18,12 @@ public class ModelMetricsBinomialV3 Date: Wed, 17 Jan 2024 18:07:25 +0100 Subject: [PATCH 03/37] GH-15809: implement AIC and Loglikelihood calculations for ModelMetricsBinomial --- h2o-core/src/main/java/hex/ModelMetricsBinomial.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java index d69d70eb069d..d28f412adc06 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java @@ -206,6 +206,13 @@ public static class MetricBuilderBinomial> ex // Compute log loss _logloss += w * MathUtils.logloss(err); } + + if(m.getClass().toString().contains("Generic")) { + _loglikelihood += m.likelihood(w, yact[0], ds); + System.out.println("_logloss: " + _logloss); + System.out.println("_loglikelihood: " + _loglikelihood); + } + _count++; _wcount += w; assert !Double.isNaN(_sumsqe); @@ -216,6 +223,7 @@ public static class MetricBuilderBinomial> ex @Override public void reduce( T mb ) { super.reduce(mb); // sumseq, count _logloss += mb._logloss; + _loglikelihood += mb._loglikelihood; _auc.reduce(mb._auc); } @@ -274,6 +282,10 @@ private ModelMetrics makeModelMetrics(Model m, Frame f, GainsLift gl) { sigma = weightedSigma(); mse = _sumsqe / _wcount; logloss = _logloss / _wcount; + if(m.getClass().toString().contains("Generic")) { + loglikelihood = -1 * _loglikelihood ; // get likelihood from negative loglikelihood + aic = m.aic(loglikelihood); + } auc = new AUC2(_auc); } else { auc = new AUC2(); From a2e84eb24dd413705783737beddf9e827dd97ab1 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Wed, 17 Jan 2024 18:07:58 +0100 Subject: [PATCH 04/37] GH-15809: add AIC and Loglikelihood to output metrics --- .../main/java/water/api/schemas3/ModelMetricsBinomialV3.java | 2 ++ h2o-py/h2o/model/metrics_base.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java index f81d5bb97a8a..13e925d44f3a 100644 --- a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java +++ b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java @@ -57,6 +57,8 @@ public S fillFromImpl(ModelMetricsBinomial modelMetrics) { // sigma = modelMetrics._sigma; r2 = modelMetrics.r2(); logloss = modelMetrics._logloss; + loglikelihood = modelMetrics._loglikelihood; + AIC = modelMetrics._aic; mean_per_class_error = modelMetrics._mean_per_class_error; diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 63d5e2d240a2..b303e6b015fb 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -151,7 +151,10 @@ def _str_items(self, verbosity=None): "Null deviance: {}".format(self.null_deviance()), "Residual deviance: {}".format(self.residual_deviance()), ]) - if is_type(self.aic(), numeric): items.append("AIC: {}".format(self.aic())) + if is_type(self.aic(), numeric) and self.loglikelihood() != 0: + items.append("AIC: {}".format(self.aic())) + if is_type(self.loglikelihood(), numeric) and self.loglikelihood() != 0: + items.append("Loglikelihood: {}".format(self.loglikelihood())) items.extend(self._str_items_custom()) return items From d5dcae1b0592a90826121f376a5271da3b30837b Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Wed, 17 Jan 2024 18:08:32 +0100 Subject: [PATCH 05/37] GH-15809: update test to check AIC and Loglikelihood calculation for loaded model --- .../pyunit_generic_model_mojo_glm.py | 57 +++++++++++++------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py index 258ffa556f1d..f571ee9bf45b 100644 --- a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py +++ b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py @@ -1,3 +1,5 @@ +import math + import tempfile import os import sys @@ -30,7 +32,10 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): compare_params(glm, generic_mojo_model_from_file) output_test(original_output.getvalue(), generic_output.getvalue(), strip_part, algo_name, generic_algo_name) - predictions = generic_mojo_model_from_file.predict(airlines) + + airlines_metrics_dataset = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) + predictions = generic_mojo_model_from_file.predict(airlines_metrics_dataset) + metrics = generic_mojo_model_from_file.model_performance(airlines_metrics_dataset) # just loglikelihood (multiplied -1) assert predictions is not None assert predictions.nrows == 24421 assert generic_mojo_model_from_file._model_json["output"]["model_summary"] is not None @@ -38,35 +43,51 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): assert generic_mojo_model_from_file._model_json["output"]["variable_importances"] is not None assert len(generic_mojo_model_from_file._model_json["output"]["variable_importances"]._cell_values) > 0 + print(generic_mojo_model_from_file._model_json["output"]["training_metrics"]) generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo"); generic_mojo_filename = generic_mojo_model_from_file.download_mojo(path=generic_mojo_filename) assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename) + glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True) + glm_calc_like.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) + + print("glm training metrics:") + print(glm._model_json["output"]["training_metrics"]) + print("glm calc like training metrics:") + print(glm_calc_like._model_json["output"]["training_metrics"]) + print("metrics:") + print(metrics) + + assert math.isclose(glm_calc_like._model_json["output"]["training_metrics"]._metric_json["AIC"], + metrics._metric_json["AIC"], rel_tol=1e-3), "The numbers are not close enough." + assert math.isclose(-glm_calc_like._model_json["output"]["training_metrics"]._metric_json["loglikelihood"], + metrics._metric_json["loglikelihood"], rel_tol=1e-3), "The numbers are not close enough." + def mojo_model_test_binomial(): test(["Origin", "Dest"], "IsDepDelayed", compare_output, 'GLM Model: summary', 'ModelMetricsBinomialGLM: glm', 'ModelMetricsBinomialGLMGeneric: generic', 'binomial') -def mojo_model_test_regression(): - test(["Origin", "Dest"], "Distance", compare_output, 'GLM Model: summary', 'ModelMetricsRegressionGLM: glm', - 'ModelMetricsRegressionGLMGeneric: generic', 'gaussian') +# def mojo_model_test_regression(): +# test(["Origin", "Dest"], "Distance", compare_output, 'GLM Model: summary', 'ModelMetricsRegressionGLM: glm', +# 'ModelMetricsRegressionGLMGeneric: generic', 'gaussian') +# +# +# def mojo_model_test_multinomial(): +# test(["Origin", "Distance"], "Dest", compare_output, 'GLM Model: summary', 'ModelMetricsMultinomialGLM: glm', +# 'ModelMetricsMultinomialGLMGeneric: generic', 'multinomial') +# +# +# def mojo_model_test_ordinal(): +# test(["Origin", "Distance", "IsDepDelayed"], "fDayOfWeek", compare_output, 'GLM Model: summary', +# 'ModelMetricsOrdinalGLM: glm', +# 'ModelMetricsOrdinalGLMGeneric: generic', 'ordinal') -def mojo_model_test_multinomial(): - test(["Origin", "Distance"], "Dest", compare_output, 'GLM Model: summary', 'ModelMetricsMultinomialGLM: glm', - 'ModelMetricsMultinomialGLMGeneric: generic', 'multinomial') - - -def mojo_model_test_ordinal(): - test(["Origin", "Distance", "IsDepDelayed"], "fDayOfWeek", compare_output, 'GLM Model: summary', - 'ModelMetricsOrdinalGLM: glm', - 'ModelMetricsOrdinalGLMGeneric: generic', 'ordinal') - - pyunit_utils.run_tests([ mojo_model_test_binomial, - mojo_model_test_multinomial, - mojo_model_test_regression, - mojo_model_test_ordinal + # mojo_model_test_multinomial, + # mojo_model_test_regression, + # mojo_model_test_ordinal ]) From 7b4d93f72ae15a4238d9fa1951a47a8dc4d04a83 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 22 Jan 2024 14:08:32 +0100 Subject: [PATCH 06/37] GH-15809: correct betas source --- h2o-algos/src/main/java/hex/generic/GenericModel.java | 9 +++------ .../java/hex/genmodel/algos/glm/GlmMojoModelBase.java | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/h2o-algos/src/main/java/hex/generic/GenericModel.java b/h2o-algos/src/main/java/hex/generic/GenericModel.java index 2bac8ad31737..cf0841a625a1 100644 --- a/h2o-algos/src/main/java/hex/generic/GenericModel.java +++ b/h2o-algos/src/main/java/hex/generic/GenericModel.java @@ -2,7 +2,7 @@ import hex.*; import hex.genmodel.*; -import hex.genmodel.algos.glm.GlmMojoModel; +import hex.genmodel.algos.glm.GlmMojoModelBase; import hex.genmodel.algos.kmeans.KMeansMojoModel; import hex.genmodel.descriptor.ModelDescriptor; import hex.genmodel.descriptor.ModelDescriptorBuilder; @@ -147,11 +147,8 @@ public double aic(double likelihood) { if (!_algoName.equals("glm")) { return 0; } else { - double aic = -2 * likelihood + 2 * Arrays.stream(((GlmMojoModel) this.genModel()).getBeta()).filter(b -> b != 0).count(); - System.out.println("Bettas for AIC: " + Arrays.stream(((GlmMojoModel) this.genModel()).getBeta()).filter(b -> b != 0).count()); - System.out.println(Arrays.toString(((GlmMojoModel) this.genModel()).getBeta())); - System.out.println("Gen AIC: " + aic); - return aic; + long betasCount = Arrays.stream(((GlmMojoModelBase) this.genModel()).getBeta()).filter(b -> b != 0).count(); + return -2 * likelihood + 2 * betasCount; } } diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java index de3f01431bf8..69cd843e5890 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java @@ -2,7 +2,7 @@ import hex.genmodel.MojoModel; -abstract class GlmMojoModelBase extends MojoModel { +public abstract class GlmMojoModelBase extends MojoModel { boolean _useAllFactorLevels; From c20c2d542dd85f6c18b57a653d6d7c7c785b4e08 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 22 Jan 2024 14:09:35 +0100 Subject: [PATCH 07/37] GH-15809: implement AIC and loglikelihood calculation for multinomial generic glm --- .../java/hex/ModelMetricsBinomialGLM.java | 2 +- .../java/hex/ModelMetricsMultinomial.java | 29 +++++++++++++++++-- .../hex/ModelMetricsMultinomialGeneric.java | 2 +- .../ModelMetricsMultinomialGenericV3.java | 3 ++ .../schemas3/ModelMetricsMultinomialV3.java | 9 ++++++ 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java index e47e3eb33fca..501b54f04dcf 100644 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialGLM.java @@ -70,7 +70,7 @@ public ModelMetricsMultinomialGLM(Model model, Frame frame, long nobs, double ms double sigma, ConfusionMatrix cm, float [] hr, double logloss, double resDev, double nullDev, double aic, long nDof, long rDof, MultinomialAUC auc, CustomMetric customMetric, double loglikelihood) { - super(model, frame, nobs, mse, domain, sigma, cm, hr, logloss, auc, customMetric); + super(model, frame, nobs, mse, domain, sigma, cm, hr, logloss, loglikelihood, aic, auc, customMetric); _resDev = resDev; _nullDev = nullDev; _AIC = aic; diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java index 7c47205d291f..b991d01e3140 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java @@ -18,14 +18,20 @@ public class ModelMetricsMultinomial extends ModelMetricsSupervised { public final float[] _hit_ratios; // Hit ratios public final ConfusionMatrix _cm; public final double _logloss; + public final double _loglikelihood; + public final double _aic; public double _mean_per_class_error; public MultinomialAUC _auc; - public ModelMetricsMultinomial(Model model, Frame frame, long nobs, double mse, String[] domain, double sigma, ConfusionMatrix cm, float[] hr, double logloss, MultinomialAUC auc, CustomMetric customMetric) { + public ModelMetricsMultinomial(Model model, Frame frame, long nobs, double mse, String[] domain, double sigma, + ConfusionMatrix cm, float[] hr, double logloss, double loglikelihood, double aic, + MultinomialAUC auc, CustomMetric customMetric) { super(model, frame, nobs, mse, domain, sigma, customMetric); _cm = cm; _hit_ratios = hr; _logloss = logloss; + _loglikelihood = loglikelihood; + _aic = aic; _mean_per_class_error = cm==null || cm.tooLarge() ? Double.NaN : cm.mean_per_class_error(); _auc = auc; } @@ -35,6 +41,8 @@ public String toString() { StringBuilder sb = new StringBuilder(); sb.append(super.toString()); sb.append(" logloss: " + (float)_logloss + "\n"); + sb.append(" loglikelihood: " + (float)_loglikelihood + "\n"); + sb.append(" AIC: " + (float)_aic + "\n"); sb.append(" mean_per_class_error: " + (float)_mean_per_class_error + "\n"); sb.append(" hit ratios: " + Arrays.toString(_hit_ratios) + "\n"); sb.append(" AUC: "+auc()+ "\n"); @@ -59,6 +67,8 @@ public String toString() { } public double logloss() { return _logloss; } + public double loglikelihood() { return _loglikelihood; } + public double aic() { return _aic; } public double mean_per_class_error() { return _mean_per_class_error; } @Override public ConfusionMatrix cm() { return _cm; } @Override public float[] hr() { return _hit_ratios; } @@ -235,6 +245,7 @@ public static class MetricBuilderMultinomial Date: Mon, 22 Jan 2024 14:09:57 +0100 Subject: [PATCH 08/37] GH-15809: minor aic retrieval fix --- h2o-py/h2o/model/metrics_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index b303e6b015fb..203ba4099be0 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -151,7 +151,7 @@ def _str_items(self, verbosity=None): "Null deviance: {}".format(self.null_deviance()), "Residual deviance: {}".format(self.residual_deviance()), ]) - if is_type(self.aic(), numeric) and self.loglikelihood() != 0: + if is_type(self.aic(), numeric) and self.aic() != 0: items.append("AIC: {}".format(self.aic())) if is_type(self.loglikelihood(), numeric) and self.loglikelihood() != 0: items.append("Loglikelihood: {}".format(self.loglikelihood())) From 50fb79999bf829d5d8b16bb220c757b92ac54922 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:04:01 +0100 Subject: [PATCH 09/37] GH-15809: enable loglikelihood and AIC calculation for multinomial family --- .../main/java/hex/ModelMetricsRegression.java | 26 +++++++++++++++++-- .../java/hex/ModelMetricsRegressionCoxPH.java | 2 +- .../java/hex/ModelMetricsRegressionGLM.java | 7 ++--- .../hex/ModelMetricsRegressionGeneric.java | 2 +- .../ModelMetricsRegressionGenericV3.java | 9 +++++++ .../schemas3/ModelMetricsRegressionV3.java | 9 +++++++ 6 files changed, 46 insertions(+), 9 deletions(-) diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegression.java b/h2o-core/src/main/java/hex/ModelMetricsRegression.java index ef6cab4c1f96..ce7b2956c136 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegression.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegression.java @@ -13,21 +13,28 @@ public class ModelMetricsRegression extends ModelMetricsSupervised { public final double _mean_residual_deviance; + public final double _AIC; + public final double _loglikelihood; /** * @return {@link #mean_residual_deviance()} for all algos except GLM, for which it means "total residual deviance". **/ public double residual_deviance() { return _mean_residual_deviance; } + public double loglikelihood() { return _loglikelihood; } + public double aic() { return _AIC; } @SuppressWarnings("unused") public double mean_residual_deviance() { return _mean_residual_deviance; } public final double _mean_absolute_error; public double mae() { return _mean_absolute_error; } public final double _root_mean_squared_log_error; public double rmsle() { return _root_mean_squared_log_error; } - public ModelMetricsRegression(Model model, Frame frame, long nobs, double mse, double sigma, double mae,double rmsle, double meanResidualDeviance, CustomMetric customMetric) { + public ModelMetricsRegression(Model model, Frame frame, long nobs, double mse, double sigma, double mae,double rmsle, + double meanResidualDeviance, CustomMetric customMetric, double loglikelihood, double aic) { super(model, frame, nobs, mse, null, sigma, customMetric); _mean_residual_deviance = meanResidualDeviance; _mean_absolute_error = mae; _root_mean_squared_log_error = rmsle; + _loglikelihood = loglikelihood; + _AIC = aic; } public static ModelMetricsRegression getFromDKV(Model model, Frame frame) { @@ -51,6 +58,8 @@ public String toString() { } sb.append(" mean absolute error: " + (float)_mean_absolute_error + "\n"); sb.append(" root mean squared log error: " + (float)_root_mean_squared_log_error + "\n"); + sb.append(" loglikelihood: " + (float)_loglikelihood + "\n"); + sb.append(" AIC: " + (float)_AIC + "\n"); return sb.toString(); } @@ -117,6 +126,7 @@ public static class MetricBuilderRegression Distribution _dist; double _abserror; double _rmslerror; + protected double _loglikelihood; public MetricBuilderRegression() { super(1,null); //this will make _work = new float[2]; } @@ -147,6 +157,10 @@ public MetricBuilderRegression(Distribution dist) { _sumdeviance += _dist.deviance(w, yact[0], ds[0]); } } + + if(m.getClass().toString().contains("Generic")) { + _loglikelihood += m.likelihood(w, yact[0], ds); + } _count++; _wcount += w; @@ -160,6 +174,7 @@ public MetricBuilderRegression(Distribution dist) { _sumdeviance += mb._sumdeviance; _abserror += mb._abserror; _rmslerror += mb._rmslerror; + _loglikelihood += mb._loglikelihood; } // Having computed a MetricBuilder, this method fills in a ModelMetrics @@ -173,6 +188,8 @@ ModelMetricsRegression computeModelMetrics(Model m, Frame f, Frame adaptedFrame, double mse = _sumsqe / _wcount; double mae = _abserror/_wcount; //Mean Absolute Error double rmsle = Math.sqrt(_rmslerror/_wcount); //Root Mean Squared Log Error + double loglikelihood = Double.NaN; + double aic = Double.NaN; if (adaptedFrame ==null) adaptedFrame = f; double meanResDeviance = 0; if (m != null && m.isDistributionHuber()){ @@ -195,7 +212,12 @@ ModelMetricsRegression computeModelMetrics(Model m, Frame f, Frame adaptedFrame, } else { meanResDeviance = Double.NaN; } - ModelMetricsRegression mm = new ModelMetricsRegression(m, f, _count, mse, weightedSigma(), mae, rmsle, meanResDeviance, _customMetric); + if(m.getClass().toString().contains("Generic")) { + loglikelihood = -1 * _loglikelihood ; // get likelihood from negative loglikelihood + aic = m.aic(loglikelihood); + } + ModelMetricsRegression mm = new ModelMetricsRegression(m, f, _count, mse, weightedSigma(), mae, rmsle, + meanResDeviance, _customMetric, loglikelihood, aic); return mm; } } diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java b/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java index 28a1dfa0b949..03e7cd0cf608 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java @@ -34,7 +34,7 @@ public class ModelMetricsRegressionCoxPH extends ModelMetricsRegression { public ModelMetricsRegressionCoxPH(Model model, Frame frame, long nobs, double mse, double sigma, double mae, double rmsle, double meanResidualDeviance, CustomMetric customMetric, double concordance, long concordant, long discordant, long tied_y) { - super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric); + super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric, 0, 0); this._concordance = concordance; this._concordant = concordant; diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegressionGLM.java b/h2o-core/src/main/java/hex/ModelMetricsRegressionGLM.java index de12cb321c32..39401e6c7fc8 100644 --- a/h2o-core/src/main/java/hex/ModelMetricsRegressionGLM.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegressionGLM.java @@ -10,20 +10,17 @@ public class ModelMetricsRegressionGLM extends ModelMetricsRegression implements public final long _residualDegressOfFreedom; public final double _resDev; public final double _nullDev; - public final double _AIC; - public final double _loglikelihood; + public ModelMetricsRegressionGLM(Model model, Frame frame, long nobs, double mse, double sigma, double mae, double rmsle, double resDev, double meanResDev, double nullDev, double aic, long nDof, long rDof, CustomMetric customMetric, double loglikelihood) { - super(model, frame, nobs, mse, sigma, mae, rmsle, meanResDev, customMetric); + super(model, frame, nobs, mse, sigma, mae, rmsle, meanResDev, customMetric, loglikelihood, aic); _resDev = resDev; _nullDev = nullDev; - _AIC = aic; _nullDegressOfFreedom = nDof; _residualDegressOfFreedom = rDof; - _loglikelihood = loglikelihood; } @Override diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java b/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java index 6715df012a75..8add2c06fb8e 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java @@ -6,7 +6,7 @@ public class ModelMetricsRegressionGeneric extends ModelMetricsRegression { public ModelMetricsRegressionGeneric(Model model, Frame frame, long nobs, double mse, double sigma, double mae, double rmsle, double meanResidualDeviance, CustomMetric customMetric, String description) { - super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric); + super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric, 0, 0); _description = description; } } diff --git a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsRegressionGenericV3.java b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsRegressionGenericV3.java index 2b749922dcf1..da825d5c9765 100644 --- a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsRegressionGenericV3.java +++ b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsRegressionGenericV3.java @@ -14,12 +14,21 @@ public class ModelMetricsRegressionGenericV3 Date: Fri, 26 Jan 2024 17:05:30 +0100 Subject: [PATCH 10/37] GH-15809: remove prints --- h2o-core/src/main/java/hex/ModelMetricsBinomial.java | 2 -- h2o-core/src/main/java/hex/ModelMetricsMultinomial.java | 2 -- 2 files changed, 4 deletions(-) diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java index d28f412adc06..206d0fd4e27e 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java @@ -209,8 +209,6 @@ public static class MetricBuilderBinomial> ex if(m.getClass().toString().contains("Generic")) { _loglikelihood += m.likelihood(w, yact[0], ds); - System.out.println("_logloss: " + _logloss); - System.out.println("_loglikelihood: " + _loglikelihood); } _count++; diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java index b991d01e3140..ab6fba09b45a 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java @@ -317,8 +317,6 @@ public MetricBuilderMultinomial( int nclasses, String[] domain, MultinomialAucTy if(m.getClass().toString().contains("Generic")) { _loglikelihood += m.likelihood(w, yact[0], ds); - System.out.println("_logloss: " + _logloss); - System.out.println("_loglikelihood: " + _loglikelihood); } return ds; // Flow coding } From 749dd439a44d8473ed613fed30712ef145ab102a Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:06:03 +0100 Subject: [PATCH 11/37] GH-15809: refactor --- .../main/java/hex/generic/GenericModel.java | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/h2o-algos/src/main/java/hex/generic/GenericModel.java b/h2o-algos/src/main/java/hex/generic/GenericModel.java index cf0841a625a1..feaaf4836422 100644 --- a/h2o-algos/src/main/java/hex/generic/GenericModel.java +++ b/h2o-algos/src/main/java/hex/generic/GenericModel.java @@ -44,6 +44,7 @@ public class GenericModel extends Model _genModelSource; + private GLMModel.GLMParameters _glmParameters; /** * Full constructor @@ -58,6 +59,26 @@ public GenericModel(Key selfKey, GenericModelParameters parms, Gen if (mojoModel._modelAttributes != null && mojoModel._modelAttributes.getModelParameters() != null) { _parms._modelParameters = GenericModelParameters.convertParameters(mojoModel._modelAttributes.getModelParameters()); } + _glmParameters = null; + if(_algoName.toLowerCase().contains("glm")) { + GlmMojoModelBase glmModel = (GlmMojoModelBase) mojoModel; + // create GLM parameters instance + _glmParameters = new GLMModel.GLMParameters( + GLMModel.GLMParameters.Family.valueOf(getParamByName("family").toString()), + GLMModel.GLMParameters.Link.valueOf(getParamByName("link").toString()), + Arrays.stream(getParamByName("lambda").toString().trim().replaceAll("\\[", "") + .replaceAll("\\]", "").split(",\\s*")) + .mapToDouble(Double::parseDouble).toArray(), + Arrays.stream(getParamByName("alpha").toString().trim().replaceAll("\\[", "") + .replaceAll("\\]", "").split(",\\s*")) + .mapToDouble(Double::parseDouble).toArray(), + Double.parseDouble(getParamByName("tweedie_variance_power").toString()), + Double.parseDouble(getParamByName("tweedie_link_power").toString()), + null, + Double.parseDouble(getParamByName("theta").toString()), + glmModel.getDispersionEstimated() + ); + } } public GenericModel(Key selfKey, GenericModelParameters parms, GenericModelOutput output, @@ -80,10 +101,6 @@ private static MojoModel reconstructMojo(ByteVec mojoBytes) { throw new IllegalStateException("Unreachable MOJO file: " + mojoBytes._key, e); } } - private Iced getParamByName(String name) { - return Arrays.stream(this._parms._modelParameters) - .filter(p -> Objects.equals(p.name, name)).findAny().get().actual_value; - } @Override public ModelMetrics.MetricBuilder makeMetricBuilder(String[] domain) { @@ -140,6 +157,10 @@ protected PredictScoreResult predictScoreImpl(Frame fr, Frame adaptFrm, String d // return super.predictScoreImpl(fr, adaptFrm, destination_key, j, true, customMetricFunc); } + private Iced getParamByName(String name) { + return Arrays.stream(this._parms._modelParameters) + .filter(p -> Objects.equals(p.name, name)).findAny().get().actual_value; + } @Override public double aic(double likelihood) { @@ -158,23 +179,8 @@ public double likelihood(double w, double y, double[] f) { if (w == 0 || !_algoName.equals("glm")) { return 0; } else { - // create GLM parameters instance - GLMModel.GLMParameters glmParameters = new GLMModel.GLMParameters( - GLMModel.GLMParameters.Family.valueOf(getParamByName("family").toString()), - GLMModel.GLMParameters.Link.valueOf(getParamByName("link").toString()), - Arrays.stream(getParamByName("lambda").toString().trim().replaceAll("\\[", "") - .replaceAll("\\]", "").split(",\\s*")) - .mapToDouble(Double::parseDouble).toArray(), - Arrays.stream(getParamByName("alpha").toString().trim().replaceAll("\\[", "") - .replaceAll("\\]", "").split(",\\s*")) - .mapToDouble(Double::parseDouble).toArray(), - Double.parseDouble(getParamByName("tweedie_variance_power").toString()), - Double.parseDouble(getParamByName("tweedie_link_power").toString()), - null, - Double.parseDouble(getParamByName("theta").toString()) - ); // time-consuming calculation for the final scoring for GLM model - return glmParameters.likelihood(w, y, f); + return _glmParameters.likelihood(w, y, f); } } From 25ba898b1db640adcff906553fc218c3900e6b24 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:06:45 +0100 Subject: [PATCH 12/37] GH-15809: add new parameter to the constructor, and add new constructor --- h2o-algos/src/main/java/hex/glm/GLMModel.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/h2o-algos/src/main/java/hex/glm/GLMModel.java b/h2o-algos/src/main/java/hex/glm/GLMModel.java index 361759e91008..f4b71c1a1809 100755 --- a/h2o-algos/src/main/java/hex/glm/GLMModel.java +++ b/h2o-algos/src/main/java/hex/glm/GLMModel.java @@ -727,6 +727,11 @@ public GLMParameters(Family f, Link l, double [] lambda, double [] alpha, double public GLMParameters(Family f, Link l, double [] lambda, double [] alpha, double twVar, double twLnk, String[] interactions, double theta){ + this(f,l,lambda,alpha,twVar,twLnk,interactions, theta, Double.NaN); + } + + public GLMParameters(Family f, Link l, double [] lambda, double [] alpha, double twVar, double twLnk, + String[] interactions, double theta, double dispersion_estimated){ this._lambda = lambda; this._alpha = alpha; this._tweedie_variance_power = twVar; @@ -736,7 +741,7 @@ public GLMParameters(Family f, Link l, double [] lambda, double [] alpha, double _link = l; this._theta=theta; this._invTheta = 1.0/theta; - this._dispersion_estimated = _init_dispersion_parameter; + this._dispersion_estimated = Double.isNaN(dispersion_estimated) ? _init_dispersion_parameter : dispersion_estimated; } public final double variance(double mu){ From db5463a538940ad1cd85cead3638736d37ee2822 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:08:09 +0100 Subject: [PATCH 13/37] GH-15809: add dispersion_estimated parameter to GLM mojo --- h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java | 2 ++ .../main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java | 6 ++++++ .../src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java | 1 + 3 files changed, 9 insertions(+) diff --git a/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java b/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java index 788e4949a96e..2c519cba3f12 100644 --- a/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java +++ b/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java @@ -39,6 +39,8 @@ protected void writeModelData() throws IOException { if (GLMModel.GLMParameters.Family.tweedie.equals(model._parms._family)) writekv("tweedie_link_power", model._parms._tweedie_link_power); + + writekv("dispersion_estimated", model._parms._compute_p_values ? model._parms._dispersion_estimated : 0); } } diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java index 69cd843e5890..b557e69898dc 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoModelBase.java @@ -18,6 +18,8 @@ public abstract class GlmMojoModelBase extends MojoModel { String _family; boolean _versionSupportOffset; + + double _dispersion_estimated; GlmMojoModelBase(String[] columns, String[][] domains, String responseColumn) { super(columns, domains, responseColumn); @@ -26,6 +28,10 @@ public abstract class GlmMojoModelBase extends MojoModel { public double[] getBeta() { return _beta; } + + public double getDispersionEstimated() { + return _dispersion_estimated; + } void init() { _versionSupportOffset = _mojo_version >= 1.1; diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java index f2f27748eacc..55df15f2cf58 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java @@ -29,6 +29,7 @@ protected void readModelData() throws IOException { _model._beta = readkv("beta"); _model._family = readkv("family"); + _model._dispersion_estimated = readkv("dispersion_estimated"); if (_model instanceof GlmMojoModel) { GlmMojoModel m = (GlmMojoModel) _model; From a7269e462aad7f47365c7d37261927a3368c0eff Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:08:58 +0100 Subject: [PATCH 14/37] GH-15809: update and fix tests --- .../pyunit_generic_model_mojo_glm.py | 68 ++++++++++--------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py index f571ee9bf45b..348639821934 100644 --- a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py +++ b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py @@ -16,7 +16,9 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): # GLM airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) - glm = H2OGeneralizedLinearEstimator(nfolds = 2, family = family, max_iterations=2) # alpha = 1, lambda_ = 1, bad values, use default + glm = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, + compute_p_values=family == "gaussian", + remove_collinear_columns=family == "gaussian") # alpha = 1, lambda_ = 1, bad values, use default glm.train(x = x, y = y, training_frame=airlines, validation_frame=airlines, ) with H2OTableDisplay.pandas_rendering_enabled(False), capture_output() as (original_output, _): glm.show() @@ -48,20 +50,22 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): generic_mojo_filename = generic_mojo_model_from_file.download_mojo(path=generic_mojo_filename) assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename) - glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True) - glm_calc_like.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) - - print("glm training metrics:") - print(glm._model_json["output"]["training_metrics"]) - print("glm calc like training metrics:") - print(glm_calc_like._model_json["output"]["training_metrics"]) - print("metrics:") - print(metrics) - - assert math.isclose(glm_calc_like._model_json["output"]["training_metrics"]._metric_json["AIC"], - metrics._metric_json["AIC"], rel_tol=1e-3), "The numbers are not close enough." - assert math.isclose(-glm_calc_like._model_json["output"]["training_metrics"]._metric_json["loglikelihood"], - metrics._metric_json["loglikelihood"], rel_tol=1e-3), "The numbers are not close enough." + if family != 'ordinal': # loglikelihood calculation not available for ordinal family yet + glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True, + compute_p_values=True, remove_collinear_columns=True) + glm_calc_like.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) + + print("glm training metrics:") + print(glm._model_json["output"]["training_metrics"]) + print("glm calc like training metrics:") + print(glm_calc_like._model_json["output"]["training_metrics"]) + print("metrics:") + print(metrics) + + assert math.isclose(glm_calc_like._model_json["output"]["training_metrics"]._metric_json["AIC"], + metrics._metric_json["AIC"], rel_tol=1e-3), "The numbers are not close enough." + assert math.isclose(-glm_calc_like._model_json["output"]["training_metrics"]._metric_json["loglikelihood"], + metrics._metric_json["loglikelihood"], rel_tol=1e-3), "The numbers are not close enough." def mojo_model_test_binomial(): @@ -69,25 +73,25 @@ def mojo_model_test_binomial(): 'ModelMetricsBinomialGLMGeneric: generic', 'binomial') -# def mojo_model_test_regression(): -# test(["Origin", "Dest"], "Distance", compare_output, 'GLM Model: summary', 'ModelMetricsRegressionGLM: glm', -# 'ModelMetricsRegressionGLMGeneric: generic', 'gaussian') -# -# -# def mojo_model_test_multinomial(): -# test(["Origin", "Distance"], "Dest", compare_output, 'GLM Model: summary', 'ModelMetricsMultinomialGLM: glm', -# 'ModelMetricsMultinomialGLMGeneric: generic', 'multinomial') -# -# -# def mojo_model_test_ordinal(): -# test(["Origin", "Distance", "IsDepDelayed"], "fDayOfWeek", compare_output, 'GLM Model: summary', -# 'ModelMetricsOrdinalGLM: glm', -# 'ModelMetricsOrdinalGLMGeneric: generic', 'ordinal') +def mojo_model_test_regression(): + test(["Origin", "Dest"], "Distance", compare_output, 'GLM Model: summary', 'ModelMetricsRegressionGLM: glm', + 'ModelMetricsRegressionGLMGeneric: generic', 'gaussian') + + +def mojo_model_test_multinomial(): + test(["Origin", "Distance"], "Dest", compare_output, 'GLM Model: summary', 'ModelMetricsMultinomialGLM: glm', + 'ModelMetricsMultinomialGLMGeneric: generic', 'multinomial') + + +def mojo_model_test_ordinal(): + test(["Origin", "Distance", "IsDepDelayed"], "fDayOfWeek", compare_output, 'GLM Model: summary', + 'ModelMetricsOrdinalGLM: glm', + 'ModelMetricsOrdinalGLMGeneric: generic', 'ordinal') pyunit_utils.run_tests([ mojo_model_test_binomial, - # mojo_model_test_multinomial, - # mojo_model_test_regression, - # mojo_model_test_ordinal + mojo_model_test_multinomial, + mojo_model_test_regression, + mojo_model_test_ordinal ]) From da44bfef00dfa665a81b5055bf95e36047449273 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:58:31 +0100 Subject: [PATCH 15/37] GH-15809: fix metrics exposure in python --- h2o-py/h2o/model/metrics_base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 203ba4099be0..26a92815d304 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -151,10 +151,11 @@ def _str_items(self, verbosity=None): "Null deviance: {}".format(self.null_deviance()), "Residual deviance: {}".format(self.residual_deviance()), ]) - if is_type(self.aic(), numeric) and self.aic() != 0: - items.append("AIC: {}".format(self.aic())) - if is_type(self.loglikelihood(), numeric) and self.loglikelihood() != 0: - items.append("Loglikelihood: {}".format(self.loglikelihood())) + if is_type(self.aic(), numeric) and self.aic() != 0: + items.append("AIC: {}".format(self.aic())) + if is_type(self.loglikelihood(), numeric) and self.loglikelihood() != 0: + items.append("Loglikelihood: {}".format(self.loglikelihood())) + items.extend(self._str_items_custom()) return items From 1dac03ed71cd2ce107c1c1a9f5f57adb56252de9 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 26 Jan 2024 17:58:51 +0100 Subject: [PATCH 16/37] GH-15809: fix parameters --- .../pyunit_generic_model_mojo_glm.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py index 348639821934..e105c79f0c3b 100644 --- a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py +++ b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py @@ -17,9 +17,9 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): # GLM airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) glm = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, - compute_p_values=family == "gaussian", - remove_collinear_columns=family == "gaussian") # alpha = 1, lambda_ = 1, bad values, use default - glm.train(x = x, y = y, training_frame=airlines, validation_frame=airlines, ) + compute_p_values=(family == "gaussian"), + remove_collinear_columns=(family == "gaussian")) # alpha = 1, lambda_ = 1, bad values, use default + glm.train(x=x, y=y, training_frame=airlines, validation_frame=airlines,) with H2OTableDisplay.pandas_rendering_enabled(False), capture_output() as (original_output, _): glm.show() print(original_output.getvalue()) @@ -51,8 +51,9 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename) if family != 'ordinal': # loglikelihood calculation not available for ordinal family yet - glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True, - compute_p_values=True, remove_collinear_columns=True) + glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True, + compute_p_values=(family == "gaussian"), + remove_collinear_columns=(family == "gaussian")) glm_calc_like.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) print("glm training metrics:") From 8665036e3b0b4887f3368af7bb546dec13f232a0 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 29 Jan 2024 16:50:57 +0100 Subject: [PATCH 17/37] GH-15809: add null check --- h2o-core/src/main/java/hex/ModelMetricsBinomial.java | 4 ++-- h2o-core/src/main/java/hex/ModelMetricsMultinomial.java | 4 ++-- h2o-core/src/main/java/hex/ModelMetricsRegression.java | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java index 206d0fd4e27e..20374d054e22 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java @@ -207,7 +207,7 @@ public static class MetricBuilderBinomial> ex _logloss += w * MathUtils.logloss(err); } - if(m.getClass().toString().contains("Generic")) { + if(m != null && m.getClass().toString().contains("Generic")) { _loglikelihood += m.likelihood(w, yact[0], ds); } @@ -280,7 +280,7 @@ private ModelMetrics makeModelMetrics(Model m, Frame f, GainsLift gl) { sigma = weightedSigma(); mse = _sumsqe / _wcount; logloss = _logloss / _wcount; - if(m.getClass().toString().contains("Generic")) { + if(m != null && m.getClass().toString().contains("Generic")) { loglikelihood = -1 * _loglikelihood ; // get likelihood from negative loglikelihood aic = m.aic(loglikelihood); } diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java index ab6fba09b45a..69b17cf9b8d1 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java @@ -315,7 +315,7 @@ public MetricBuilderMultinomial( int nclasses, String[] domain, MultinomialAucTy } - if(m.getClass().toString().contains("Generic")) { + if(m != null && m.getClass().toString().contains("Generic")) { _loglikelihood += m.likelihood(w, yact[0], ds); } return ds; // Flow coding @@ -379,7 +379,7 @@ private void calculateAucsPerRow(double ds[], int iact, double w){ } mse = _sumsqe / _wcount; logloss = _logloss / _wcount; - if(m.getClass().toString().contains("Generic")) { + if(m != null && m.getClass().toString().contains("Generic")) { loglikelihood = -1 * _loglikelihood ; // get likelihood from negative loglikelihood aic = m.aic(loglikelihood); } diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegression.java b/h2o-core/src/main/java/hex/ModelMetricsRegression.java index ce7b2956c136..1ccb0d509322 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegression.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegression.java @@ -158,7 +158,7 @@ public MetricBuilderRegression(Distribution dist) { } } - if(m.getClass().toString().contains("Generic")) { + if(m != null && m.getClass().toString().contains("Generic")) { _loglikelihood += m.likelihood(w, yact[0], ds); } @@ -212,7 +212,7 @@ ModelMetricsRegression computeModelMetrics(Model m, Frame f, Frame adaptedFrame, } else { meanResDeviance = Double.NaN; } - if(m.getClass().toString().contains("Generic")) { + if(m != null && m.getClass().toString().contains("Generic")) { loglikelihood = -1 * _loglikelihood ; // get likelihood from negative loglikelihood aic = m.aic(loglikelihood); } From eb0b133ea760cd3806c63cc6402525276866f8ac Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 29 Jan 2024 16:51:20 +0100 Subject: [PATCH 18/37] GH-15809: fix tests --- .../pyunit_pubdev_6413_cv_sd_fix.py | 2 ++ .../testdir_misc/pyunit_metric_json_check.py | 20 ++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/h2o-py/tests/testdir_jira/pyunit_pubdev_6413_cv_sd_fix.py b/h2o-py/tests/testdir_jira/pyunit_pubdev_6413_cv_sd_fix.py index 247604c1a235..8912db6cbe91 100644 --- a/h2o-py/tests/testdir_jira/pyunit_pubdev_6413_cv_sd_fix.py +++ b/h2o-py/tests/testdir_jira/pyunit_pubdev_6413_cv_sd_fix.py @@ -50,6 +50,8 @@ def assertMeanSDCalculation(meanCol, stdCol, cvVals, tol=1e-6): xsum += temp xsumSquare += temp*temp xmean = xsum/nfolds + if math.isnan(xmean) and math.isnan(float(meanCol[itemIndex])): + continue assert abs(xmean-float(meanCol[itemIndex])) < tol, "Expected mean: {0}, Actual mean: {1}".format(xmean, float(meanCol[itemIndex])) xstd = math.sqrt((xsumSquare-nfolds*xmean*xmean)*oneOverNm1) assert abs(xstd-float(stdCol[itemIndex])) < tol, "Expected SD: {0}, Actual SD: {1}".format(xstd, float(stdCol[itemIndex])) diff --git a/h2o-py/tests/testdir_misc/pyunit_metric_json_check.py b/h2o-py/tests/testdir_misc/pyunit_metric_json_check.py index c28dc8148e9f..5564a60c7f96 100644 --- a/h2o-py/tests/testdir_misc/pyunit_metric_json_check.py +++ b/h2o-py/tests/testdir_misc/pyunit_metric_json_check.py @@ -36,7 +36,9 @@ def metric_json_check(): u'nobs', u'mean_residual_deviance', u'custom_metric_name', - u'custom_metric_value'] + u'custom_metric_value', + u'loglikelihood', + u'AIC'] reg_metric_diff = list(set(reg_metric_json_keys_have) - set(reg_metric_json_keys_desired)) assert not reg_metric_diff, "There's a difference between the current ({0}) and the desired ({1}) regression " \ "metric json. The difference is {2}".format(reg_metric_json_keys_have, @@ -72,7 +74,9 @@ def metric_json_check(): u'residual_deviance', u'mean_residual_deviance', u'custom_metric_name', - u'custom_metric_value'] + u'custom_metric_value', + u'loglikelihood', + u'AIC'] reg_metric_diff = list(set(reg_metric_json_keys_have) - set(reg_metric_json_keys_desired)) assert not reg_metric_diff, "There's a difference between the current ({0}) and the desired ({1}) glm-regression " \ "metric json. The difference is {2}".format(reg_metric_json_keys_have, @@ -111,7 +115,9 @@ def metric_json_check(): u'domain', u'custom_metric_name', u'custom_metric_value', - u'pr_auc'] + u'pr_auc', + u'loglikelihood', + u'AIC'] bin_metric_diff = list(set(bin_metric_json_keys_have) - set(bin_metric_json_keys_desired)) assert not bin_metric_diff, "There's a difference between the current ({0}) and the desired ({1}) binomial " \ "metric json. The difference is {2}".format(bin_metric_json_keys_have, @@ -154,7 +160,9 @@ def metric_json_check(): u'domain', u'custom_metric_name', u'custom_metric_value', - u'pr_auc'] + u'pr_auc', + u'loglikelihood', + u'AIC'] bin_metric_diff = list(set(bin_metric_json_keys_have) - set(bin_metric_json_keys_desired)) assert not bin_metric_diff, "There's a difference between the current ({0}) and the desired ({1}) glm-binomial " \ "metric json. The difference is {2}".format(bin_metric_json_keys_have, @@ -194,7 +202,9 @@ def metric_json_check(): u'duration_in_ms', u'frame_checksum', u'custom_metric_name', - u'custom_metric_value'] + u'custom_metric_value', + u'loglikelihood', + u'AIC'] mul_metric_diff = list(set(mul_metric_json_keys_have) - set(mul_metric_json_keys_desired)) assert not mul_metric_diff, "There's a difference between the current ({0}) and the desired ({1}) multinomial " \ "metric json. The difference is {2}".format(mul_metric_json_keys_have, From be425a761d59e724f63c4680c117f37012d6c35c Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Wed, 31 Jan 2024 15:49:41 +0100 Subject: [PATCH 19/37] GH-15809: fix R tests --- .../generic/runit_generic_model_mojo_drf.R | 8 ++++---- .../generic/runit_generic_model_mojo_gbm.R | 8 ++++---- .../generic/runit_generic_model_mojo_glm.R | 10 ++++------ .../generic/runit_generic_model_mojo_xgboost.R | 8 ++++---- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_drf.R b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_drf.R index 39c74e602288..6472fac40c5b 100644 --- a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_drf.R +++ b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_drf.R @@ -21,8 +21,8 @@ test.model.generic.drf <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame","H2OBinomialModel: drf", "Model ID", "H2OBinomialMetrics: drf"), - c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic")) + c("Extract .+ frame","H2OBinomialModel: drf", "Model ID", "H2OBinomialMetrics: drf", "AIC"), + c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic", "AIC", "loglikelihood")) generic_model_preds <- h2o.predict(generic_model, data) expect_equal(length(generic_model_preds), 3) @@ -45,8 +45,8 @@ test.model.generic.drf <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame", "H2OMultinomialModel: drf", "Model ID", "H2OMultinomialMetrics: drf"), - c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic")) + c("Extract .+ frame", "H2OMultinomialModel: drf", "Model ID", "H2OMultinomialMetrics: drf", "AIC"), + c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic", "AIC")) # Regression cols <- c("Origin", "Dest") diff --git a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_gbm.R b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_gbm.R index a625fb15feb7..15155063634c 100644 --- a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_gbm.R +++ b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_gbm.R @@ -16,8 +16,8 @@ test.model.generic.gbm <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame","H2OBinomialModel: gbm", "Model ID", "H2OBinomialMetrics: gbm"), - c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic")) + c("Extract .+ frame","H2OBinomialModel: gbm", "Model ID", "H2OBinomialMetrics: gbm", "AIC"), + c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic", "AIC")) generic_model_preds <- h2o.predict(generic_model, data) expect_equal(length(generic_model_preds), 3) @@ -54,8 +54,8 @@ test.model.generic.gbm <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame", "H2OMultinomialModel: gbm", "Model ID", "H2OMultinomialMetrics: gbm"), - c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic")) + c("Extract .+ frame", "H2OMultinomialModel: gbm", "Model ID", "H2OMultinomialMetrics: gbm", "AIC"), + c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic", "AIC")) } diff --git a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_glm.R b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_glm.R index 5f3c7f317474..b8441b5971f1 100644 --- a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_glm.R +++ b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_glm.R @@ -46,9 +46,8 @@ test.model.generic.glm <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame","H2OBinomialModel: glm", "Model ID", "H2OBinomialMetrics: glm"), - c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic")) - + c("Extract .+ frame","H2OBinomialModel: glm", "Model ID", "H2OBinomialMetrics: glm", "AIC"), + c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic", "AIC")) # Multinomial cols <- c("Origin", "Distance") @@ -64,9 +63,8 @@ test.model.generic.glm <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame","H2OMultinomialModel: glm", "Model ID", "H2OMultinomialMetrics: glm"), - c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic")) - + c("Extract .+ frame","H2OMultinomialModel: glm", "Model ID", "H2OMultinomialMetrics: glm", "AIC"), + c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic", "AIC")) # Ordinal cols <- c("Origin", "Distance") diff --git a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_xgboost.R b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_xgboost.R index 761540cd8d92..62571ba7a0c5 100644 --- a/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_xgboost.R +++ b/h2o-r/tests/testdir_algos/generic/runit_generic_model_mojo_xgboost.R @@ -16,8 +16,8 @@ test.model.generic.gbm <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame","H2OBinomialModel: xgboost", "Model ID", "H2OBinomialMetrics: xgboost"), - c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic")) + c("Extract .+ frame","H2OBinomialModel: xgboost", "Model ID", "H2OBinomialMetrics: xgboost", "AIC"), + c("H2OBinomialModel: generic", "Model ID", "H2OBinomialMetrics: generic", "AIC")) generic_model_preds <- h2o.predict(generic_model, data) expect_equal(length(generic_model_preds), 3) @@ -54,8 +54,8 @@ test.model.generic.gbm <- function() { original_output <- capture.output(print(original_model)) generic_output <- capture.output(print(generic_model)) compare_output(original_output, generic_output, - c("Extract .+ frame", "H2OMultinomialModel: xgboost", "Model ID", "H2OMultinomialMetrics: xgboost"), - c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic")) + c("Extract .+ frame", "H2OMultinomialModel: xgboost", "Model ID", "H2OMultinomialMetrics: xgboost", "AIC"), + c("H2OMultinomialModel: generic", "Model ID", "H2OMultinomialMetrics: generic", "AIC")) } From ac8d8abbb83113ee254c308c5304290c6b071355 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Thu, 1 Feb 2024 16:54:02 +0100 Subject: [PATCH 20/37] GH-15809: fix reading new parameter in MOJO load --- .../src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java index 55df15f2cf58..350d2a9bcecd 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java @@ -29,7 +29,7 @@ protected void readModelData() throws IOException { _model._beta = readkv("beta"); _model._family = readkv("family"); - _model._dispersion_estimated = readkv("dispersion_estimated"); + _model._dispersion_estimated = readkv("dispersion_estimated", 1); if (_model instanceof GlmMojoModel) { GlmMojoModel m = (GlmMojoModel) _model; From 48939fd59825849c3e6817fd4c95cc9e91132cac Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Thu, 1 Feb 2024 17:17:14 +0100 Subject: [PATCH 21/37] GH-15809: fix writing new parameter in MOJO load --- h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java b/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java index 2c519cba3f12..f80e5b8a991e 100644 --- a/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java +++ b/h2o-algos/src/main/java/hex/glm/GLMMojoWriter.java @@ -40,7 +40,7 @@ protected void writeModelData() throws IOException { if (GLMModel.GLMParameters.Family.tweedie.equals(model._parms._family)) writekv("tweedie_link_power", model._parms._tweedie_link_power); - writekv("dispersion_estimated", model._parms._compute_p_values ? model._parms._dispersion_estimated : 0); + writekv("dispersion_estimated", (model._parms._compute_p_values ? model._parms._dispersion_estimated : 1.0)); } } From 76e62f612095499e4fa0e558c33f2bf745ad3c93 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Thu, 1 Feb 2024 17:46:00 +0100 Subject: [PATCH 22/37] GH-15809: fix value --- .../src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java index 350d2a9bcecd..1d81b0ba8c32 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java @@ -29,7 +29,7 @@ protected void readModelData() throws IOException { _model._beta = readkv("beta"); _model._family = readkv("family"); - _model._dispersion_estimated = readkv("dispersion_estimated", 1); + _model._dispersion_estimated = readkv("dispersion_estimated", 0.0); if (_model instanceof GlmMojoModel) { GlmMojoModel m = (GlmMojoModel) _model; From b2318c8fdb572aa13e07e5202c86fee58b924695 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 2 Feb 2024 15:50:36 +0100 Subject: [PATCH 23/37] GH-15809: fix comments --- .../main/java/water/api/schemas3/ModelMetricsBinomialV3.java | 2 +- .../main/java/water/api/schemas3/ModelMetricsMultinomialV3.java | 2 +- .../water/api/schemas3/ModelMetricsRegressionGenericV3.java | 2 +- .../main/java/water/api/schemas3/ModelMetricsRegressionV3.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java index 13e925d44f3a..0f6deb6fad6a 100644 --- a/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java +++ b/h2o-core/src/main/java/water/api/schemas3/ModelMetricsBinomialV3.java @@ -18,7 +18,7 @@ public class ModelMetricsBinomialV3 Date: Fri, 2 Feb 2024 15:51:02 +0100 Subject: [PATCH 24/37] GH-15809: fix printing metrics --- h2o-py/h2o/model/metrics_base.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 26a92815d304..72accb8e88ec 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -4,6 +4,7 @@ :copyright: (c) 2016 H2O.ai :license: Apache License Version 2.0 (see LICENSE for details) """ +import math from collections import OrderedDict from h2o.display import H2ODisplay, display, repr_def, format_to_html, format_to_multiline @@ -151,11 +152,12 @@ def _str_items(self, verbosity=None): "Null deviance: {}".format(self.null_deviance()), "Residual deviance: {}".format(self.residual_deviance()), ]) - if is_type(self.aic(), numeric) and self.aic() != 0: - items.append("AIC: {}".format(self.aic())) - if is_type(self.loglikelihood(), numeric) and self.loglikelihood() != 0: - items.append("Loglikelihood: {}".format(self.loglikelihood())) - + + if is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: + items.append("AIC: {}".format(self.aic())) + if is_type(self.loglikelihood(), numeric) and not math.isnan(self.aic()) and self.loglikelihood() != 0: + items.append("Loglikelihood: {}".format(self.loglikelihood())) + items.extend(self._str_items_custom()) return items From 211ba61a49ba4180805e5a2d36d857912650748d Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 2 Feb 2024 18:40:38 +0100 Subject: [PATCH 25/37] GH-15809: remove commented code --- h2o-algos/src/main/java/hex/generic/GenericModel.java | 1 - 1 file changed, 1 deletion(-) diff --git a/h2o-algos/src/main/java/hex/generic/GenericModel.java b/h2o-algos/src/main/java/hex/generic/GenericModel.java index feaaf4836422..9b395c562076 100644 --- a/h2o-algos/src/main/java/hex/generic/GenericModel.java +++ b/h2o-algos/src/main/java/hex/generic/GenericModel.java @@ -154,7 +154,6 @@ protected PredictScoreResult predictScoreImpl(Frame fr, Frame adaptFrm, String d return predictScoreMojoImpl(fr, destination_key, j, computeMetrics); } else return super.predictScoreImpl(fr, adaptFrm, destination_key, j, computeMetrics, customMetricFunc); -// return super.predictScoreImpl(fr, adaptFrm, destination_key, j, true, customMetricFunc); } private Iced getParamByName(String name) { From c08e2f579ea480e7fc5feeac0a08afa2fdb65860 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 2 Feb 2024 18:42:11 +0100 Subject: [PATCH 26/37] GH-15809: assign NaN instead of 0 as placeholder value for Loglikelihood --- h2o-algos/src/main/java/hex/generic/GenericModel.java | 8 +++++--- h2o-core/src/main/java/hex/Model.java | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/h2o-algos/src/main/java/hex/generic/GenericModel.java b/h2o-algos/src/main/java/hex/generic/GenericModel.java index 9b395c562076..e8220a7e75c6 100644 --- a/h2o-algos/src/main/java/hex/generic/GenericModel.java +++ b/h2o-algos/src/main/java/hex/generic/GenericModel.java @@ -165,7 +165,7 @@ private Iced getParamByName(String name) { public double aic(double likelihood) { // calculate negative loglikelihood specifically for GLM if (!_algoName.equals("glm")) { - return 0; + return Double.NaN; } else { long betasCount = Arrays.stream(((GlmMojoModelBase) this.genModel()).getBeta()).filter(b -> b != 0).count(); return -2 * likelihood + 2 * betasCount; @@ -174,8 +174,10 @@ public double aic(double likelihood) { @Override public double likelihood(double w, double y, double[] f) { - // calculate negative loglikelihood specifically for GLM - if (w == 0 || !_algoName.equals("glm")) { + // calculate negative loglikelihood specifically for GLM + if(!_algoName.equals("glm")) { + return Double.NaN; + } else if (w == 0) { return 0; } else { // time-consuming calculation for the final scoring for GLM model diff --git a/h2o-core/src/main/java/hex/Model.java b/h2o-core/src/main/java/hex/Model.java index d2d2642ec015..33db02a7ebce 100755 --- a/h2o-core/src/main/java/hex/Model.java +++ b/h2o-core/src/main/java/hex/Model.java @@ -1381,11 +1381,11 @@ public double deviance(double w, double y, double f) { } public double likelihood(double w, double y, double[] f) { - return 0.0; // place holder. This function is overridden in GLM. + return Double.NaN; // placeholder. This function is overridden in GLM and GenericModel. } public double aic(double likelihood) { - return 0.0; // place holder. This function is overridden in GLM. + return Double.NaN; // placeholder. This function is overridden in GenericModel. } public ScoringInfo[] scoring_history() { return scoringInfo; } From efbbd6a9a83f8c6aad1b1447b1526f4dc7dd4220 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 2 Feb 2024 18:42:30 +0100 Subject: [PATCH 27/37] GH-15809: default dispersion estimation set to 1 --- .../src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java index 1d81b0ba8c32..5dca5620ad85 100644 --- a/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java +++ b/h2o-genmodel/src/main/java/hex/genmodel/algos/glm/GlmMojoReader.java @@ -29,7 +29,7 @@ protected void readModelData() throws IOException { _model._beta = readkv("beta"); _model._family = readkv("family"); - _model._dispersion_estimated = readkv("dispersion_estimated", 0.0); + _model._dispersion_estimated = readkv("dispersion_estimated", 1.0); if (_model instanceof GlmMojoModel) { GlmMojoModel m = (GlmMojoModel) _model; From 607f0fa7336d6077441ad4d8084800da8c675690 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 2 Feb 2024 18:43:33 +0100 Subject: [PATCH 28/37] GH-15809: clean test --- .../testdir_generic_model/pyunit_generic_model_mojo_glm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py index e105c79f0c3b..da35215e151e 100644 --- a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py +++ b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py @@ -37,7 +37,7 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): airlines_metrics_dataset = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) predictions = generic_mojo_model_from_file.predict(airlines_metrics_dataset) - metrics = generic_mojo_model_from_file.model_performance(airlines_metrics_dataset) # just loglikelihood (multiplied -1) + metrics = generic_mojo_model_from_file.model_performance(airlines_metrics_dataset) assert predictions is not None assert predictions.nrows == 24421 assert generic_mojo_model_from_file._model_json["output"]["model_summary"] is not None @@ -54,7 +54,7 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True, compute_p_values=(family == "gaussian"), remove_collinear_columns=(family == "gaussian")) - glm_calc_like.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) + glm_calc_like.train(x=x, y=y, training_frame=airlines_metrics_dataset, validation_frame=airlines_metrics_dataset) print("glm training metrics:") print(glm._model_json["output"]["training_metrics"]) From 6c46408d653b59a4cb9aadb7121c4141be3c26ee Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Fri, 2 Feb 2024 18:54:37 +0100 Subject: [PATCH 29/37] GH-15809: fix aic check in test --- h2o-py/h2o/model/metrics_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 72accb8e88ec..1ff5be3408d6 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -155,7 +155,7 @@ def _str_items(self, verbosity=None): if is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: items.append("AIC: {}".format(self.aic())) - if is_type(self.loglikelihood(), numeric) and not math.isnan(self.aic()) and self.loglikelihood() != 0: + if is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: items.append("Loglikelihood: {}".format(self.loglikelihood())) items.extend(self._str_items_custom()) From 22c383a770b00907d568b29aa1c3ed9605f32805 Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 5 Feb 2024 14:38:21 +0100 Subject: [PATCH 30/37] GH-15809: additionally fix aic check in test --- h2o-py/h2o/model/metrics_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 1ff5be3408d6..48fdaa2b54de 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -153,9 +153,9 @@ def _str_items(self, verbosity=None): "Residual deviance: {}".format(self.residual_deviance()), ]) - if is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: + if (m_is_binomial or m_is_regression or m_is_multinomial) and is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: items.append("AIC: {}".format(self.aic())) - if is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: + if (m_is_binomial or m_is_regression or m_is_multinomial) and is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: items.append("Loglikelihood: {}".format(self.loglikelihood())) items.extend(self._str_items_custom()) From a6c8bdafdac7e4db65fc11e6267a0074f79eb12c Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 5 Feb 2024 14:39:38 +0100 Subject: [PATCH 31/37] GH-15809: additionally fix aic check in test --- h2o-py/h2o/model/metrics_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 48fdaa2b54de..6e0f01652c32 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -153,9 +153,9 @@ def _str_items(self, verbosity=None): "Residual deviance: {}".format(self.residual_deviance()), ]) - if (m_is_binomial or m_is_regression or m_is_multinomial) and is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: + if (m_is_binomial or m_is_regression or m_is_multinomial or m_is_glm) and is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: items.append("AIC: {}".format(self.aic())) - if (m_is_binomial or m_is_regression or m_is_multinomial) and is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: + if (m_is_binomial or m_is_regression or m_is_multinomial or m_is_glm) and is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: items.append("Loglikelihood: {}".format(self.loglikelihood())) items.extend(self._str_items_custom()) From 4a90a0bc72966e53dcc05419ee67da4cddbf1f0c Mon Sep 17 00:00:00 2001 From: syzonyuliia Date: Mon, 5 Feb 2024 18:41:37 +0100 Subject: [PATCH 32/37] GH-15809: fit test - add default parameters --- .../src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java b/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java index e8a1623f7fa3..cf6292ce63f6 100644 --- a/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java +++ b/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java @@ -361,7 +361,7 @@ protected void doSth() { GBMModel.GBMOutput output = new GBMModel.GBMOutput(new GBM(parms)); Model res = new GBMModel(_job._result, parms, output); Frame fr = aml().getTrainingFrame(); - output._training_metrics = new ModelMetricsRegression(res, fr, 1, 1, 1, 1, 1, 1, null); + output._training_metrics = new ModelMetricsRegression(res, fr, 1, 1, 1, 1, 1, 1, null, 0, 0); DKV.put(_job._result, res); } } From 9a5bf25329b35ab4ff791026ad8cb43514f41e98 Mon Sep 17 00:00:00 2001 From: wendycwong Date: Mon, 5 Feb 2024 14:49:08 -0800 Subject: [PATCH 33/37] Fixed test discrepancies. --- .../pyunit_generic_model_mojo_glm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py index da35215e151e..b74cd8ec1aaf 100644 --- a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py +++ b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py @@ -18,7 +18,7 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) glm = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, compute_p_values=(family == "gaussian"), - remove_collinear_columns=(family == "gaussian")) # alpha = 1, lambda_ = 1, bad values, use default + remove_collinear_columns=(family == "gaussian"), seed=12345) # alpha = 1, lambda_ = 1, bad values, use default glm.train(x=x, y=y, training_frame=airlines, validation_frame=airlines,) with H2OTableDisplay.pandas_rendering_enabled(False), capture_output() as (original_output, _): glm.show() @@ -53,7 +53,7 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): if family != 'ordinal': # loglikelihood calculation not available for ordinal family yet glm_calc_like = H2OGeneralizedLinearEstimator(nfolds=2, family=family, max_iterations=2, calc_like=True, compute_p_values=(family == "gaussian"), - remove_collinear_columns=(family == "gaussian")) + remove_collinear_columns=(family == "gaussian"), seed=12345) glm_calc_like.train(x=x, y=y, training_frame=airlines_metrics_dataset, validation_frame=airlines_metrics_dataset) print("glm training metrics:") @@ -64,9 +64,9 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): print(metrics) assert math.isclose(glm_calc_like._model_json["output"]["training_metrics"]._metric_json["AIC"], - metrics._metric_json["AIC"], rel_tol=1e-3), "The numbers are not close enough." + metrics._metric_json["AIC"], rel_tol=1e-6), "The numbers are not close enough." assert math.isclose(-glm_calc_like._model_json["output"]["training_metrics"]._metric_json["loglikelihood"], - metrics._metric_json["loglikelihood"], rel_tol=1e-3), "The numbers are not close enough." + metrics._metric_json["loglikelihood"], rel_tol=1e-6), "The numbers are not close enough." def mojo_model_test_binomial(): From 4a88933791a19badecabd792adea5f2aade4b75c Mon Sep 17 00:00:00 2001 From: wendycwong Date: Tue, 6 Feb 2024 09:15:06 -0800 Subject: [PATCH 34/37] only return AIC and loglikelihood for glm models --- h2o-py/h2o/model/metrics_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index 6e0f01652c32..d33d030143e3 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -325,7 +325,8 @@ def aic(self): ... validation_frame = valid) >>> pros_glm.aic() """ - return self._metric_json['AIC'] + if self._algo == 'glm': + return self._metric_json['AIC'] def loglikelihood(self): """The log likelihood for this set of metrics. @@ -348,7 +349,8 @@ def loglikelihood(self): ... validation_frame = valid) >>> pros_glm.loglikelihood() """ - return self._metric_json['loglikelihood'] + if self._algo == 'glm': + return self._metric_json['loglikelihood'] def gini(self): """Gini coefficient. From a6b962c9e41f853c4684a4123219fdc0cc5dce3a Mon Sep 17 00:00:00 2001 From: wendycwong Date: Sat, 10 Feb 2024 12:15:16 -0800 Subject: [PATCH 35/37] fixed AIC problem when model is not glm --- h2o-py/h2o/model/metrics_base.py | 15 +++++++-------- .../pyunit_generic_model_mojo_glm.py | 6 +++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/h2o-py/h2o/model/metrics_base.py b/h2o-py/h2o/model/metrics_base.py index d33d030143e3..eccf94c105ff 100644 --- a/h2o-py/h2o/model/metrics_base.py +++ b/h2o-py/h2o/model/metrics_base.py @@ -153,10 +153,11 @@ def _str_items(self, verbosity=None): "Residual deviance: {}".format(self.residual_deviance()), ]) - if (m_is_binomial or m_is_regression or m_is_multinomial or m_is_glm) and is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: - items.append("AIC: {}".format(self.aic())) - if (m_is_binomial or m_is_regression or m_is_multinomial or m_is_glm) and is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: - items.append("Loglikelihood: {}".format(self.loglikelihood())) + if m_is_glm: + if is_type(self.aic(), numeric) and not math.isnan(self.aic()) and self.aic() != 0: + items.append("AIC: {}".format(self.aic())) + if is_type(self.loglikelihood(), numeric) and not math.isnan(self.loglikelihood()) and self.loglikelihood() != 0: + items.append("Loglikelihood: {}".format(self.loglikelihood())) items.extend(self._str_items_custom()) return items @@ -325,8 +326,7 @@ def aic(self): ... validation_frame = valid) >>> pros_glm.aic() """ - if self._algo == 'glm': - return self._metric_json['AIC'] + return self._metric_json['AIC'] def loglikelihood(self): """The log likelihood for this set of metrics. @@ -349,8 +349,7 @@ def loglikelihood(self): ... validation_frame = valid) >>> pros_glm.loglikelihood() """ - if self._algo == 'glm': - return self._metric_json['loglikelihood'] + return self._metric_json['loglikelihood'] def gini(self): """Gini coefficient. diff --git a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py index b74cd8ec1aaf..f67ed2737b16 100644 --- a/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py +++ b/h2o-py/tests/testdir_generic_model/pyunit_generic_model_mojo_glm.py @@ -22,6 +22,7 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): glm.train(x=x, y=y, training_frame=airlines, validation_frame=airlines,) with H2OTableDisplay.pandas_rendering_enabled(False), capture_output() as (original_output, _): glm.show() + print("*************** GLM model metrics") print(original_output.getvalue()) original_model_filename = tempfile.mkdtemp() original_model_filename = glm.download_mojo(original_model_filename) @@ -30,6 +31,7 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): assert generic_mojo_model_from_file is not None with H2OTableDisplay.pandas_rendering_enabled(False), capture_output() as (generic_output, _): generic_mojo_model_from_file.show() + print("*************** GLM generic model metrics") print(generic_output.getvalue()) compare_params(glm, generic_mojo_model_from_file) @@ -72,16 +74,18 @@ def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family): def mojo_model_test_binomial(): test(["Origin", "Dest"], "IsDepDelayed", compare_output, 'GLM Model: summary', 'ModelMetricsBinomialGLM: glm', 'ModelMetricsBinomialGLMGeneric: generic', 'binomial') + print("completed binomial tests.") def mojo_model_test_regression(): test(["Origin", "Dest"], "Distance", compare_output, 'GLM Model: summary', 'ModelMetricsRegressionGLM: glm', 'ModelMetricsRegressionGLMGeneric: generic', 'gaussian') - + print("completed Gaussian tests.") def mojo_model_test_multinomial(): test(["Origin", "Distance"], "Dest", compare_output, 'GLM Model: summary', 'ModelMetricsMultinomialGLM: glm', 'ModelMetricsMultinomialGLMGeneric: generic', 'multinomial') + print("completed Multinomial tests.") def mojo_model_test_ordinal(): From 3932c16670956f3e3c923ea6f72260bd4fa8f691 Mon Sep 17 00:00:00 2001 From: wendycwong Date: Sat, 10 Feb 2024 19:50:39 -0800 Subject: [PATCH 36/37] Incorporate Tomas F review. --- .../src/main/java/hex/psvm/MetricBuilderPSVM.java | 2 +- .../tree/isofor/MetricBuilderAnomalySupervised.java | 2 +- .../java/ai/h2o/automl/ModelingStepsExecutorTest.java | 2 +- h2o-core/src/main/java/hex/ModelMetricsBinomial.java | 8 +++++++- .../src/main/java/hex/ModelMetricsBinomialGeneric.java | 2 +- .../src/main/java/hex/ModelMetricsMultinomial.java | 7 +++++++ .../java/hex/ModelMetricsMultinomialGLMGeneric.java | 10 ++++++++++ .../main/java/hex/ModelMetricsMultinomialGeneric.java | 2 +- h2o-core/src/main/java/hex/ModelMetricsRegression.java | 5 +++++ .../src/main/java/hex/ModelMetricsRegressionCoxPH.java | 2 +- .../main/java/hex/ModelMetricsRegressionGeneric.java | 2 +- 11 files changed, 36 insertions(+), 8 deletions(-) diff --git a/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java b/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java index 58006dac319f..25f0513c49c4 100644 --- a/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java +++ b/h2o-algos/src/main/java/hex/psvm/MetricBuilderPSVM.java @@ -76,7 +76,7 @@ public ModelMetrics makeModelMetrics(Model m, Frame f, Frame frameWithWeights, F } else { auc = AUC2.emptyAUC(); } - ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, Double.NaN, Double.NaN, Double.NaN, null, _customMetric); + ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, sigma, auc, Double.NaN, null, _customMetric); if (m != null) m.addModelMetrics(mm); return mm; } diff --git a/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java b/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java index c10e4e5ed841..8cd3b0a24e37 100644 --- a/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java +++ b/h2o-algos/src/main/java/hex/tree/isofor/MetricBuilderAnomalySupervised.java @@ -35,7 +35,7 @@ public MetricBuilderAnomalySupervised(String[] domain) { auc = AUC2.emptyAUC(); } ModelMetricsBinomial mm = new ModelMetricsBinomial(m, f, _count, mse, _domain, - sigma, auc, logloss, 0, 0, null, _customMetric); + sigma, auc, logloss, null, _customMetric); if (m != null) { m.addModelMetrics(mm); } diff --git a/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java b/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java index cf6292ce63f6..e8a1623f7fa3 100644 --- a/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java +++ b/h2o-automl/src/test/java/ai/h2o/automl/ModelingStepsExecutorTest.java @@ -361,7 +361,7 @@ protected void doSth() { GBMModel.GBMOutput output = new GBMModel.GBMOutput(new GBM(parms)); Model res = new GBMModel(_job._result, parms, output); Frame fr = aml().getTrainingFrame(); - output._training_metrics = new ModelMetricsRegression(res, fr, 1, 1, 1, 1, 1, 1, null, 0, 0); + output._training_metrics = new ModelMetricsRegression(res, fr, 1, 1, 1, 1, 1, 1, null); DKV.put(_job._result, res); } } diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java index 20374d054e22..1ff8ae9019d8 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java @@ -10,7 +10,6 @@ import water.fvec.Frame; import water.fvec.Vec; import water.util.ArrayUtils; -import water.util.Log; import water.util.MathUtils; import java.util.Arrays; @@ -36,6 +35,13 @@ public ModelMetricsBinomial(Model model, Frame frame, long nobs, double mse, Str _mean_per_class_error = cm() == null ? Double.NaN : cm().mean_per_class_error(); } + public ModelMetricsBinomial(Model model, Frame frame, long nobs, double mse, String[] domain, + double sigma, AUC2 auc, double logloss, GainsLift gainsLift, + CustomMetric customMetric) { + this(model, frame, nobs, mse, domain, sigma, auc, logloss, Double.NaN, Double.NaN, + gainsLift, customMetric); + } + public static ModelMetricsBinomial getFromDKV(Model model, Frame frame) { ModelMetrics mm = ModelMetrics.getFromDKV(model, frame); if( !(mm instanceof ModelMetricsBinomial) ) diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java b/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java index 009526c07eec..462372adc04a 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomialGeneric.java @@ -16,7 +16,7 @@ public ModelMetricsBinomialGeneric(Model model, Frame frame, long nobs, double m CustomMetric customMetric, double mean_per_class_error, TwoDimTable thresholds_and_metric_scores, TwoDimTable max_criteria_and_metric_scores, TwoDimTable confusion_matrix, double r2, final String description) { - super(model, frame, nobs, mse, domain, sigma, auc, logloss, 0, 0, null, customMetric); + super(model, frame, nobs, mse, domain, sigma, auc, logloss, null, customMetric); _gainsLiftTable = gainsLiftTable; _thresholds_and_metric_scores = thresholds_and_metric_scores; _max_criteria_and_metric_scores = max_criteria_and_metric_scores; diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java index 69b17cf9b8d1..06b28163d875 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java @@ -36,6 +36,13 @@ public ModelMetricsMultinomial(Model model, Frame frame, long nobs, double mse, _auc = auc; } + public ModelMetricsMultinomial(Model model, Frame frame, long nobs, double mse, String[] domain, double sigma, + ConfusionMatrix cm, float[] hr, double logloss, MultinomialAUC auc, + CustomMetric customMetric) { + this(model, frame, nobs, mse, domain, sigma, cm, hr, logloss, Double.NaN, Double.NaN, auc, customMetric); + + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomialGLMGeneric.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomialGLMGeneric.java index 22717a523b8f..ba41e5f457f7 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomialGLMGeneric.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomialGLMGeneric.java @@ -14,6 +14,16 @@ public class ModelMetricsMultinomialGLMGeneric extends ModelMetricsMultinomialGe public final double _loglikelihood; public final TwoDimTable _coefficients_table; + public ModelMetricsMultinomialGLMGeneric(Model model, Frame frame, long nobs, double mse, String[] domain, double sigma, + TwoDimTable confusion_matrix, TwoDimTable hit_ratio_table, double logloss, CustomMetric customMetric, + double mean_per_class_error, long nullDegreesOfFreedom, long residualDegreesOfFreedom, + double resDev, double nullDev, TwoDimTable coefficients_table, double r2, + TwoDimTable multinomial_auc_table, TwoDimTable multinomial_aucpr_table, MultinomialAucType type, + final String description) { + this(model, frame, nobs, mse, domain, sigma, confusion_matrix, hit_ratio_table, logloss, customMetric, + mean_per_class_error, nullDegreesOfFreedom, residualDegreesOfFreedom, resDev, nullDev, Double.NaN, + coefficients_table, r2, multinomial_auc_table, multinomial_aucpr_table, type, description, Double.NaN); + } public ModelMetricsMultinomialGLMGeneric(Model model, Frame frame, long nobs, double mse, String[] domain, double sigma, TwoDimTable confusion_matrix, TwoDimTable hit_ratio_table, double logloss, CustomMetric customMetric, double mean_per_class_error, long nullDegreesOfFreedom, long residualDegreesOfFreedom, diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomialGeneric.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomialGeneric.java index fc273177be50..cddedcffee02 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomialGeneric.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomialGeneric.java @@ -16,7 +16,7 @@ public ModelMetricsMultinomialGeneric(Model model, Frame frame, long nobs, doubl TwoDimTable confusion_matrix, TwoDimTable hit_ratio_table, double logloss, CustomMetric customMetric, double mean_per_class_error, double r2, TwoDimTable multinomial_auc_table, TwoDimTable multinomial_aucpr_table, MultinomialAucType type, final String description) { - super(model, frame, nobs, mse, domain, sigma, null, null, logloss, 0, 0, null, customMetric); + super(model, frame, nobs, mse, domain, sigma, null, null, logloss, null, customMetric); _confusion_matrix_table = confusion_matrix; _hit_ratio_table = hit_ratio_table; _auc = new MultinomialAUC(multinomial_auc_table, multinomial_aucpr_table, domain, type); diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegression.java b/h2o-core/src/main/java/hex/ModelMetricsRegression.java index 1ccb0d509322..1c2c9bea19f5 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegression.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegression.java @@ -37,6 +37,11 @@ public ModelMetricsRegression(Model model, Frame frame, long nobs, double mse, d _AIC = aic; } + public ModelMetricsRegression(Model model, Frame frame, long nobs, double mse, double sigma, double mae,double rmsle, + double meanResidualDeviance, CustomMetric customMetric) { + this(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric, Double.NaN, Double.NaN); + } + public static ModelMetricsRegression getFromDKV(Model model, Frame frame) { ModelMetrics mm = ModelMetrics.getFromDKV(model, frame); diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java b/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java index 03e7cd0cf608..28a1dfa0b949 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegressionCoxPH.java @@ -34,7 +34,7 @@ public class ModelMetricsRegressionCoxPH extends ModelMetricsRegression { public ModelMetricsRegressionCoxPH(Model model, Frame frame, long nobs, double mse, double sigma, double mae, double rmsle, double meanResidualDeviance, CustomMetric customMetric, double concordance, long concordant, long discordant, long tied_y) { - super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric, 0, 0); + super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric); this._concordance = concordance; this._concordant = concordant; diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java b/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java index 8add2c06fb8e..6715df012a75 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegressionGeneric.java @@ -6,7 +6,7 @@ public class ModelMetricsRegressionGeneric extends ModelMetricsRegression { public ModelMetricsRegressionGeneric(Model model, Frame frame, long nobs, double mse, double sigma, double mae, double rmsle, double meanResidualDeviance, CustomMetric customMetric, String description) { - super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric, 0, 0); + super(model, frame, nobs, mse, sigma, mae, rmsle, meanResidualDeviance, customMetric); _description = description; } } From 6421a819a0b634c331a06b6bbfa5838cd72b6f23 Mon Sep 17 00:00:00 2001 From: wendycwong Date: Sat, 10 Feb 2024 20:17:33 -0800 Subject: [PATCH 37/37] replace m != null && m.getClass().toString().contains(generic) with score4Generic --- h2o-core/src/main/java/hex/ModelMetricsBinomial.java | 3 ++- h2o-core/src/main/java/hex/ModelMetricsMultinomial.java | 3 ++- h2o-core/src/main/java/hex/ModelMetricsRegression.java | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java index 1ff8ae9019d8..029a68388ef5 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsBinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsBinomial.java @@ -192,6 +192,7 @@ public static class MetricBuilderBinomial> ex if(w == 0 || Double.isNaN(w)) return ds; int iact = (int)yact[0]; boolean quasibinomial = (m!=null && m._parms._distribution == DistributionFamily.quasibinomial); + boolean score4Generic = m != null && m.getClass().toString().contains("Generic"); if (quasibinomial) { if (yact[0] != 0) iact = _domain[0].equals(String.valueOf((int) yact[0])) ? 0 : 1; // actual response index needed for confusion matrix, AUC, etc. @@ -213,7 +214,7 @@ public static class MetricBuilderBinomial> ex _logloss += w * MathUtils.logloss(err); } - if(m != null && m.getClass().toString().contains("Generic")) { + if(score4Generic) { // only perform for generic model, will increase run time for training if performs _loglikelihood += m.likelihood(w, yact[0], ds); } diff --git a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java index 06b28163d875..a15e33b2d6ea 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java +++ b/h2o-core/src/main/java/hex/ModelMetricsMultinomial.java @@ -294,6 +294,7 @@ public MetricBuilderMultinomial( int nclasses, String[] domain, MultinomialAucTy if(ArrayUtils.hasNaNs(ds)) return ds; if(w == 0 || Double.isNaN(w)) return ds; final int iact = (int)yact[0]; + boolean score4Generic = m != null && m.getClass().toString().contains("Generic"); _count++; _wcount += w; _wY += w*iact; @@ -322,7 +323,7 @@ public MetricBuilderMultinomial( int nclasses, String[] domain, MultinomialAucTy } - if(m != null && m.getClass().toString().contains("Generic")) { + if(score4Generic) { // only perform for generic model, will increase run time for training if perform _loglikelihood += m.likelihood(w, yact[0], ds); } return ds; // Flow coding diff --git a/h2o-core/src/main/java/hex/ModelMetricsRegression.java b/h2o-core/src/main/java/hex/ModelMetricsRegression.java index 1c2c9bea19f5..d137911d3ca4 100755 --- a/h2o-core/src/main/java/hex/ModelMetricsRegression.java +++ b/h2o-core/src/main/java/hex/ModelMetricsRegression.java @@ -143,6 +143,7 @@ public MetricBuilderRegression(Distribution dist) { // ds[0] has the prediction and ds[1,..,N] is ignored @Override public double[] perRow(double ds[], float[] yact, Model m) {return perRow(ds, yact, 1, 0, m);} @Override public double[] perRow(double ds[], float[] yact, double w, double o, Model m) { + boolean score4Generic = m != null && m.getClass().toString().contains("Generic"); if( Float.isNaN(yact[0]) ) return ds; // No errors if actual is missing if(ArrayUtils.hasNaNs(ds)) return ds; // No errors if prediction has missing values (can happen for GLM) if(w == 0 || Double.isNaN(w)) return ds; @@ -163,7 +164,7 @@ public MetricBuilderRegression(Distribution dist) { } } - if(m != null && m.getClass().toString().contains("Generic")) { + if(score4Generic) { // only perform for generic model, will increase run time for training if performs _loglikelihood += m.likelihood(w, yact[0], ds); }