Skip to content

Commit

Permalink
Fix failed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
maurever committed Jan 17, 2025
1 parent d19bd44 commit 2e09300
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 25 deletions.
13 changes: 6 additions & 7 deletions h2o-algos/src/main/java/hex/knn/KNN.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class KNNDriver extends Driver {
public void computeImpl() {
KNNModel model = null;
Frame result = new Frame(Key.make("KNN_distances"));
Frame tmpResult = null;
try {
init(true); // Initialize parameters
if (error_count() > 0) {
Expand All @@ -72,12 +71,15 @@ public void computeImpl() {
query[j] = train.vec(j).chunkForChunkIdx(i).deepCopy();
}
KNNDistanceTask task = new KNNDistanceTask(_parms._k, query, KNNDistanceFactory.createDistance(_parms._distance), idColumnIndex, idColumn, idType, responseColumnIndex, responseColumn);
tmpResult = task.doAll(train).outputFrame();
Frame tmpResult = task.doAll(train).outputFrame();
Scope.untrack(tmpResult);

// merge result from a chunk
result = result.add(tmpResult);
}
DKV.put(result._key, result);
model._output.setDistancesKey(result._key);
Key<Frame> key = result._key;
DKV.put(key, result);
model._output.setDistancesKey(key);
Scope.untrack(result);

model.update(_job);
Expand All @@ -90,9 +92,6 @@ public void computeImpl() {
if (model != null) {
model.unlock(_job);
}
if (tmpResult != null) {
tmpResult.remove();
}
}
}
}
Expand Down
4 changes: 1 addition & 3 deletions h2o-algos/src/test/java/hex/knn/KNNTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ public void testIris() {
ModelMetricsMultinomial mm1 = (ModelMetricsMultinomial) knn._output._training_metrics;
Assert.assertEquals(mm.auc(), mm1.auc(), 0);

// test after KNN API will be ready
//knn.testJavaScoring(fr, preds, 0);

knn.testJavaScoring(fr, preds, 0);
} finally {
if (knn != null){
knn.delete();
Expand Down
37 changes: 37 additions & 0 deletions h2o-bindings/bin/custom/R/gen_knn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
extensions = dict(
extra_params=[('verbose', 'FALSE')],
required_params=['x', 'y', 'training_frame', 'id_column', 'response_column'],
skip_default_set_params_for=['training_frame', 'ignored_columns', 'response_column', 'offset_column'],
set_required_params="""
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
if (!missing(id_column)) {
parms$id_column <- id_column
} else {
stop("ID column is required.")
}
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y
"""
)


doc = dict(
preamble="""
Build a KNN model
Builds a K-nearest neighbour model on an H2OFrame.
""",
params=dict(
verbose="""
\code{Logical}. Print scoring history to the console. Defaults to FALSE.
"""
),
returns="""
Creates a \linkS4class{H2OModel} object of the right type.
""",
seealso="""
\code{\link{predict.H2OModel}} for prediction
""",
examples=""""""
)
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def make_tests(classifier):


failing = [
'H2OStackedEnsembleClassifier', 'H2OUpliftRandomForestClassifier' # needs a separate test (requires models as parameters)
'H2OStackedEnsembleClassifier', 'H2OUpliftRandomForestClassifier', 'H2OKnnClassifier' # needs a separate test (requires models as parameters)
]
classifiers = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
if name.endswith('Classifier') and name not in ['H2OAutoMLClassifier']+failing]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def make_tests(classifier):
'H2OUpliftRandomForestEstimator', # generic part is not implemented yet
'H2ODecisionTreeEstimator', # generic part is not implemented yet
'H2OAdaBoostEstimator', # generic part is not implemented yet or test needs to be adjusted just for classification
'H2OKnnEstimator' # generic part is not implemented yet
]
estimators = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
if name.endswith('Estimator') and name not in ['H2OAutoMLEstimator'] + failing]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def make_tests(classifier):
'H2OStackedEnsembleRegressor', # needs a separate test (requires models as parameters),
'H2OUpliftRandomForestRegressor', # does not support regression yet
'H2ODecisionTreeRegressor', # does not support regression yet
'H2OAdaBoostRegressor' # does not support regression yet
'H2OAdaBoostRegressor', # does not support regression yet
'H2OKnnRegressor' # does not support regression
]
regressors = [cls for name, cls in inspect.getmembers(h2o.sklearn, inspect.isclass)
if name.endswith('Regressor') and name not in ['H2OAutoMLRegressor']+failing]
Expand Down
2 changes: 1 addition & 1 deletion h2o-py/tests_rest_smoke/testdir_multi_jvm/test_rest_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
algos = ['coxph', 'kmeans', 'deeplearning', 'drf', 'glm', 'gbm', 'pca', 'naivebayes', 'glrm', 'svd', 'isotonicregression',
'psvm', 'aggregator', 'word2vec', 'stackedensemble', 'xgboost', 'isolationforest', 'gam',
'generic', 'targetencoder', 'rulefit', 'extendedisolationforest', 'anovaglm', 'modelselection',
'upliftdrf', 'infogram', 'dt', 'adaboost', 'hglm']
'upliftdrf', 'infogram', 'dt', 'adaboost', 'hglm', 'knn']

algo_additional_default_params = { 'grep' : { 'regex' : '.*' },
'kmeans' : { 'k' : 2 },
Expand Down
36 changes: 25 additions & 11 deletions h2o-r/h2o-package/R/knn.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
#
# -------------------------- knn -------------------------- #
#'
#' Build a KNN model
#'
#' Builds a K-nearest neighbour model on an H2OFrame.
#'
#' @param x (Optional) A vector containing the names or indices of the predictor variables to use in building the model.
#' If x is missing, then all columns except y are used.
#' @param y The name or column index of the response variable in the data.
#' The response must be either a numeric or a categorical/factor variable.
#' If the response is numeric, then a regression model will be trained, otherwise it will train a classification model.
#' @param training_frame Id of the training data frame.
#' @param model_id Destination id for this model; auto-generated if not specified.
#' @param id_column Identify each record column.
#' @param model_id Destination id for this model; auto-generated if not specified.
#' @param ignore_const_cols \code{Logical}. Ignore constant columns. Defaults to TRUE.
#' @param seed Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default).
#' Defaults to -1 (time-based random number).
Expand All @@ -25,12 +29,16 @@
#' "WEIGHTED_OVO". Defaults to AUTO.
#' @param k Number of nearest neighbours Defaults to 3.
#' @param distance Distance type Must be one of: "AUTO", "euclidean", "manhattan", "cosine".
#' @param verbose \code{Logical}. Print scoring history to the console. Defaults to FALSE.
#' @return Creates a \linkS4class{H2OModel} object of the right type.
#' @seealso \code{\link{predict.H2OModel}} for prediction
#' @export
h2o.knn <- function(x,
y,
training_frame,
id_column,
response_column,
model_id = NULL,
id_column = NULL,
ignore_const_cols = TRUE,
seed = -1,
max_runtime_secs = 0,
Expand All @@ -40,7 +48,8 @@ h2o.knn <- function(x,
gainslift_bins = -1,
auc_type = c("AUTO", "NONE", "MACRO_OVR", "WEIGHTED_OVR", "MACRO_OVO", "WEIGHTED_OVO"),
k = 3,
distance = c("AUTO", "euclidean", "manhattan", "cosine"))
distance = c("AUTO", "euclidean", "manhattan", "cosine"),
verbose = FALSE)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
training_frame <- .validate.H2OFrame(training_frame, required=TRUE)
Expand All @@ -59,9 +68,11 @@ h2o.knn <- function(x,
parms <- list()
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
if( !missing(offset_column) && !is.null(offset_column)) args$x_ignore <- args$x_ignore[!( offset_column == args$x_ignore )]
if( !missing(weights_column) && !is.null(weights_column)) args$x_ignore <- args$x_ignore[!( weights_column == args$x_ignore )]
if( !missing(fold_column) && !is.null(fold_column)) args$x_ignore <- args$x_ignore[!( fold_column == args$x_ignore )]
if (!missing(id_column)) {
parms$id_column <- id_column
} else {
stop("ID column is required.")
}
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y

Expand Down Expand Up @@ -91,13 +102,14 @@ h2o.knn <- function(x,
parms$distance <- distance

# Error check and build model
model <- .h2o.modelJob('knn', parms, h2oRestApiVersion=3, verbose=FALSE)
model <- .h2o.modelJob('knn', parms, h2oRestApiVersion=3, verbose=verbose)
return(model)
}
.h2o.train_segments_knn <- function(x,
y,
training_frame,
id_column = NULL,
id_column,
response_column,
ignore_const_cols = TRUE,
seed = -1,
max_runtime_secs = 0,
Expand Down Expand Up @@ -133,9 +145,11 @@ h2o.knn <- function(x,
parms <- list()
parms$training_frame <- training_frame
args <- .verify_dataxy(training_frame, x, y)
if( !missing(offset_column) && !is.null(offset_column)) args$x_ignore <- args$x_ignore[!( offset_column == args$x_ignore )]
if( !missing(weights_column) && !is.null(weights_column)) args$x_ignore <- args$x_ignore[!( weights_column == args$x_ignore )]
if( !missing(fold_column) && !is.null(fold_column)) args$x_ignore <- args$x_ignore[!( fold_column == args$x_ignore )]
if (!missing(id_column)) {
parms$id_column <- id_column
} else {
stop("ID column is required.")
}
parms$ignored_columns <- args$x_ignore
parms$response_column <- args$y

Expand Down
2 changes: 1 addition & 1 deletion h2o-r/tests/testdir_algos/knn/runit_knn_smoke.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ source("../../../scripts/h2o-r-test-setup.R")

knn.smoke <- function() {
iris.hex <- h2o.uploadFile( locate("smalldata/iris/iris.csv"))
iris.knn <- h2o.knn(x=1:4, training_frame=iris.hex, k = 3, distance="euclidean", seed = 1234)
iris.knn <- h2o.knn(x=1:4, y=5, training_frame=iris.hex, k=3 , distance="euclidean", seed=1234)

# Score test data with different default auc_type (previous was "NONE", so no AUC calculation)
perf <- h2o.performance(iris.knn, test.hex, auc_type="WEIGHTED_OVO")
Expand Down

0 comments on commit 2e09300

Please sign in to comment.