Fix failed tests

h2oai · Jan 17, 2025 · 2e09300 · 2e09300
1 parent d19bd44
commit 2e09300
Show file tree

Hide file tree

Showing 9 changed files with 75 additions and 25 deletions.
diff --git a/h2o-algos/src/main/java/hex/knn/KNN.java b/h2o-algos/src/main/java/hex/knn/KNN.java
@@ -49,7 +49,6 @@ class KNNDriver extends Driver {
         public void computeImpl() {
             KNNModel model = null;
             Frame result = new Frame(Key.make("KNN_distances"));
-            Frame tmpResult = null;
             try {
                 init(true);   // Initialize parameters
                 if (error_count() > 0) {
@@ -72,12 +71,15 @@ public void computeImpl() {
                         query[j] = train.vec(j).chunkForChunkIdx(i).deepCopy();
                     }
                     KNNDistanceTask task = new KNNDistanceTask(_parms._k, query, KNNDistanceFactory.createDistance(_parms._distance), idColumnIndex, idColumn, idType, responseColumnIndex, responseColumn);
-                    tmpResult = task.doAll(train).outputFrame();
+                    Frame tmpResult = task.doAll(train).outputFrame();
+                    Scope.untrack(tmpResult);
+
                     // merge result from a chunk
                     result = result.add(tmpResult);
                 }
-                DKV.put(result._key, result);
-                model._output.setDistancesKey(result._key);
+                Key<Frame> key = result._key;
+                DKV.put(key, result);
+                model._output.setDistancesKey(key);
                 Scope.untrack(result);
 
                 model.update(_job);
@@ -90,9 +92,6 @@ public void computeImpl() {
                 if (model != null) {
                     model.unlock(_job);
                 }
-                if (tmpResult != null) {
-                    tmpResult.remove();
-                }
             }
         }
     }

diff --git a/h2o-algos/src/test/java/hex/knn/KNNTest.java b/h2o-algos/src/test/java/hex/knn/KNNTest.java
@@ -55,9 +55,7 @@ public void testIris() {
             ModelMetricsMultinomial mm1 = (ModelMetricsMultinomial) knn._output._training_metrics;
             Assert.assertEquals(mm.auc(), mm1.auc(), 0);
 
-            // test after KNN API will be ready
-            //knn.testJavaScoring(fr, preds, 0);
-
+            knn.testJavaScoring(fr, preds, 0);
         } finally {
             if (knn != null){
                 knn.delete();

diff --git a/h2o-bindings/bin/custom/R/gen_knn.py b/h2o-bindings/bin/custom/R/gen_knn.py
@@ -0,0 +1,37 @@
+extensions = dict(
+    extra_params=[('verbose', 'FALSE')],
+    required_params=['x', 'y', 'training_frame', 'id_column', 'response_column'],
+    skip_default_set_params_for=['training_frame', 'ignored_columns', 'response_column', 'offset_column'],
+    set_required_params="""
+parms$training_frame <- training_frame
+args <- .verify_dataxy(training_frame, x, y)
+if (!missing(id_column)) {
+  parms$id_column <- id_column
+} else {
+  stop("ID column is required.")  
+}
+parms$ignored_columns <- args$x_ignore
+parms$response_column <- args$y
+"""
+)
+
+
+doc = dict(
+    preamble="""
+Build a KNN model
+
+Builds a K-nearest neighbour model on an H2OFrame.
+""",
+    params=dict(
+        verbose="""
+\code{Logical}. Print scoring history to the console. Defaults to FALSE.
+"""
+    ),
+    returns="""
+Creates a \linkS4class{H2OModel} object of the right type.
+""",
+    seealso="""
+\code{\link{predict.H2OModel}} for prediction
+""",
+    examples=""""""
+)
diff --git a/h2o-py/tests/testdir_sklearn/pyunit_sklearn_classification_all_estimators.py b/h2o-py/tests/testdir_sklearn/pyunit_sklearn_classification_all_estimators.py
@@ -149,7 +149,7 @@ def make_tests(classifier):
 
 
 failing = [
-    'H2OStackedEnsembleClassifier', 'H2OUpliftRandomForestClassifier'  # needs a separate test (requires models as parameters)
+    'H2OStackedEnsembleClassifier', 'H2OUpliftRandomForestClassifier', 'H2OKnnClassifier'  # needs a separate test (requires models as parameters)
 ]
 classifiers = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
                if name.endswith('Classifier') and name not in ['H2OAutoMLClassifier']+failing]

diff --git a/h2o-py/tests/testdir_sklearn/pyunit_sklearn_generic_all_estimators.py b/h2o-py/tests/testdir_sklearn/pyunit_sklearn_generic_all_estimators.py
@@ -200,6 +200,7 @@ def make_tests(classifier):
     'H2OUpliftRandomForestEstimator',  # generic part is not implemented yet
     'H2ODecisionTreeEstimator',  # generic part is not implemented yet
     'H2OAdaBoostEstimator',  # generic part is not implemented yet or test needs to be adjusted just for classification
+    'H2OKnnEstimator'  # generic part is not implemented yet
 ]
 estimators = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
               if name.endswith('Estimator') and name not in ['H2OAutoMLEstimator'] + failing]

diff --git a/h2o-py/tests/testdir_sklearn/pyunit_sklearn_regression_all_estimators.py b/h2o-py/tests/testdir_sklearn/pyunit_sklearn_regression_all_estimators.py
@@ -139,7 +139,8 @@ def make_tests(classifier):
     'H2OStackedEnsembleRegressor',  # needs a separate test (requires models as parameters),
     'H2OUpliftRandomForestRegressor',  # does not support regression yet
     'H2ODecisionTreeRegressor',  # does not support regression yet
-    'H2OAdaBoostRegressor'  # does not support regression yet
+    'H2OAdaBoostRegressor',  # does not support regression yet
+    'H2OKnnRegressor'  # does not support regression
 ]
 regressors = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
               if name.endswith('Regressor') and name not in ['H2OAutoMLRegressor']+failing]

diff --git a/h2o-py/tests_rest_smoke/testdir_multi_jvm/test_rest_api.py b/h2o-py/tests_rest_smoke/testdir_multi_jvm/test_rest_api.py
@@ -23,7 +23,7 @@
 algos = ['coxph', 'kmeans', 'deeplearning', 'drf', 'glm', 'gbm', 'pca', 'naivebayes', 'glrm', 'svd', 'isotonicregression',
          'psvm', 'aggregator', 'word2vec', 'stackedensemble', 'xgboost', 'isolationforest', 'gam',
          'generic', 'targetencoder', 'rulefit', 'extendedisolationforest', 'anovaglm', 'modelselection',
-         'upliftdrf', 'infogram', 'dt', 'adaboost', 'hglm']
+         'upliftdrf', 'infogram', 'dt', 'adaboost', 'hglm', 'knn']
 
 algo_additional_default_params = { 'grep' : { 'regex' : '.*' },
                                    'kmeans' : { 'k' : 2 },

diff --git a/h2o-r/h2o-package/R/knn.R b/h2o-r/h2o-package/R/knn.R
@@ -3,14 +3,18 @@
 #
 # -------------------------- knn -------------------------- #
 #'
+#' Build a KNN model
+#' 
+#' Builds a K-nearest neighbour model on an H2OFrame.
+#'
 #' @param x (Optional) A vector containing the names or indices of the predictor variables to use in building the model.
 #'        If x is missing, then all columns except y are used.
 #' @param y The name or column index of the response variable in the data. 
 #'        The response must be either a numeric or a categorical/factor variable. 
 #'        If the response is numeric, then a regression model will be trained, otherwise it will train a classification model.
 #' @param training_frame Id of the training data frame.
-#' @param model_id Destination id for this model; auto-generated if not specified.
 #' @param id_column Identify each record column.
+#' @param model_id Destination id for this model; auto-generated if not specified.
 #' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
 #' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
 #'        Defaults to -1 (time-based random number).
@@ -25,12 +29,16 @@
 #'        "WEIGHTED_OVO". Defaults to AUTO.
 #' @param k Number of nearest neighbours Defaults to 3.
 #' @param distance Distance type Must be one of: "AUTO", "euclidean", "manhattan", "cosine".
+#' @param verbose \code{Logical}. Print scoring history to the console. Defaults to FALSE.
+#' @return Creates a \linkS4class{H2OModel} object of the right type.
+#' @seealso \code{\link{predict.H2OModel}} for prediction
 #' @export
 h2o.knn <- function(x,
                     y,
                     training_frame,
+                    id_column,
+                    response_column,
                     model_id = NULL,
-                    id_column = NULL,
                     ignore_const_cols = TRUE,
                     seed = -1,
                     max_runtime_secs = 0,
@@ -40,7 +48,8 @@ h2o.knn <- function(x,
                     gainslift_bins = -1,
                     auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"),
                     k = 3,
-                    distance = c("AUTO", "euclidean", "manhattan", "cosine"))
+                    distance = c("AUTO", "euclidean", "manhattan", "cosine"),
+                    verbose = FALSE)
 {
   # Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
   training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
@@ -59,9 +68,11 @@ h2o.knn <- function(x,
   parms <- list()
   parms$training_frame <- training_frame
   args <- .verify_dataxy(training_frame, x, y)
-  if( !missing(offset_column) && !is.null(offset_column))  args$x_ignore <- args$x_ignore[!( offset_column == args$x_ignore )]
-  if( !missing(weights_column) && !is.null(weights_column)) args$x_ignore <- args$x_ignore[!( weights_column == args$x_ignore )]
-  if( !missing(fold_column) && !is.null(fold_column)) args$x_ignore <- args$x_ignore[!( fold_column == args$x_ignore )]
+  if (!missing(id_column)) {
+    parms$id_column <- id_column
+  } else {
+    stop("ID column is required.")  
+  }
   parms$ignored_columns <- args$x_ignore
   parms$response_column <- args$y
 
@@ -91,13 +102,14 @@ h2o.knn <- function(x,
     parms$distance <- distance
 
   # Error check and build model
-  model <- .h2o.modelJob('knn', parms, h2oRestApiVersion=3, verbose=FALSE)
+  model <- .h2o.modelJob('knn', parms, h2oRestApiVersion=3, verbose=verbose)
   return(model)
 }
 .h2o.train_segments_knn <- function(x,
                                     y,
                                     training_frame,
-                                    id_column = NULL,
+                                    id_column,
+                                    response_column,
                                     ignore_const_cols = TRUE,
                                     seed = -1,
                                     max_runtime_secs = 0,
@@ -133,9 +145,11 @@ h2o.knn <- function(x,
   parms <- list()
   parms$training_frame <- training_frame
   args <- .verify_dataxy(training_frame, x, y)
-  if( !missing(offset_column) && !is.null(offset_column))  args$x_ignore <- args$x_ignore[!( offset_column == args$x_ignore )]
-  if( !missing(weights_column) && !is.null(weights_column)) args$x_ignore <- args$x_ignore[!( weights_column == args$x_ignore )]
-  if( !missing(fold_column) && !is.null(fold_column)) args$x_ignore <- args$x_ignore[!( fold_column == args$x_ignore )]
+  if (!missing(id_column)) {
+    parms$id_column <- id_column
+  } else {
+    stop("ID column is required.")  
+  }
   parms$ignored_columns <- args$x_ignore
   parms$response_column <- args$y
 

diff --git a/h2o-r/tests/testdir_algos/knn/runit_knn_smoke.R b/h2o-r/tests/testdir_algos/knn/runit_knn_smoke.R
@@ -5,7 +5,7 @@ source("../../../scripts/h2o-r-test-setup.R")
 
 knn.smoke <- function() {
     iris.hex <- h2o.uploadFile( locate("smalldata/iris/iris.csv"))
-    iris.knn <-  h2o.knn(x=1:4, training_frame=iris.hex, k = 3, distance="euclidean", seed = 1234)
+    iris.knn <-  h2o.knn(x=1:4, y=5, training_frame=iris.hex, k=3 , distance="euclidean", seed=1234)
 
     # Score test data with different default auc_type (previous was "NONE", so no AUC calculation)
     perf <- h2o.performance(iris.knn, test.hex, auc_type="WEIGHTED_OVO")