Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-16026: remove custom_metric_func and nparallelism from schema. #16027

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/anovaglm/ANOVAGLMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public static class ANOVAGLMParameters extends Model.Parameters {
public int _nfolds = 0; // disable cross-validation
public Key<Frame> _plug_values = null;
public boolean _save_transformed_framekeys = false; // for debugging, save the transformed predictors/interaction
public int _nparallelism = 4;
public int _nparallelism = H2O.NUMCPUS;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not H2O.ARGS.nthreads?

I'm not suggesting that the nthreads would be better but I know you can specify nthreads in h2o.init(...) so I am curious when we should use the NUMCPUS and when the nthreads. NUMCPUS can be set by setting sys.ai.h2o.activeProcessorCount property which seems harder than setting nthreads.

If we don't use nthreads for parallelism, does it make sense to have it exposed in the h2o.init?


@Override
public String algoName() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,6 @@ private void initModelSelectionParameters() {
_numPredictors + ")in the dataset.");
}

if (_parms._nparallelism < 0)
error("nparallelism", "must be >= 0.");

if (_parms._nparallelism == 0)
_parms._nparallelism = H2O.NUMCPUS;

if (maxrsweep.equals(_parms._mode))
warn("validation_frame", " is not used in choosing the best k subset for ModelSelection" +
" models with maxrsweep.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public static class ModelSelectionParameters extends Model.Parameters {
public Key<Frame> _plug_values = null;
public int _max_predictor_number = 1;
public int _min_predictor_number = 1;
public int _nparallelism = 0;
public int _nparallelism = H2O.NUMCPUS; // adaptive to the system it is run on.
public double _p_values_threshold = 0;
public double _tweedie_variance_power;
public double _tweedie_link_power;
Expand Down
4 changes: 0 additions & 4 deletions h2o-algos/src/main/java/hex/schemas/ANOVAGLMV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ public static final class ANOVAGLMParametersV3 extends ModelParametersSchemaV3<A
"max_runtime_secs",
"save_transformed_framekeys",
"highest_interaction_term",
"nparallelism",
"type" // GLM SS Type, only support 3 right now
};

Expand Down Expand Up @@ -157,8 +156,5 @@ public static final class ANOVAGLMParametersV3 extends ModelParametersSchemaV3<A

@API(help="true to save the keys of transformed predictors and interaction column.")
public boolean save_transformed_framekeys;

@API(help="Number of models to build in parallel. Default to 4. Adjust according to your system.")
public int nparallelism;
}
}
6 changes: 0 additions & 6 deletions h2o-algos/src/main/java/hex/schemas/ModelSelectionV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ public static final class ModelSelectionParametersV3 extends ModelParametersSche
"max_after_balance_size",
"max_confusion_matrix_size",
"max_runtime_secs",
"custom_metric_func",
"nparallelism",
"max_predictor_number", // denote maximum number of predictors to build models for
"min_predictor_number",
"mode", // naive, maxr, maxrsweep, backward
Expand Down Expand Up @@ -279,10 +277,6 @@ public static final class ModelSelectionParametersV3 extends ModelParametersSche
level = API.Level.secondary, direction = API.Direction.INPUT)
public int min_predictor_number;

@API(help = "number of models to build in parallel. Defaults to 0.0 which is adaptive to the system capability",
level = API.Level.secondary, gridable = true)
public int nparallelism;

@API(help = "For mode='backward' only. If specified, will stop the model building process when all coefficients" +
"p-values drop below this threshold ", level = API.Level.expert)
public double p_values_threshold;
Expand Down
19 changes: 0 additions & 19 deletions h2o-py/h2o/estimators/anovaglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def __init__(self,
max_runtime_secs=0.0, # type: float
save_transformed_framekeys=False, # type: bool
highest_interaction_term=0, # type: int
nparallelism=4, # type: int
type=0, # type: int
):
"""
Expand Down Expand Up @@ -201,9 +200,6 @@ def __init__(self,
only, 3 for three columns and so on... Default to 2.
Defaults to ``0``.
:type highest_interaction_term: int
:param nparallelism: Number of models to build in parallel. Default to 4. Adjust according to your system.
Defaults to ``4``.
:type nparallelism: int
:param type: Refer to the SS type 1, 2, 3, or 4. We are currently only supporting 3
Defaults to ``0``.
:type type: int
Expand Down Expand Up @@ -245,7 +241,6 @@ def __init__(self,
self.max_runtime_secs = max_runtime_secs
self.save_transformed_framekeys = save_transformed_framekeys
self.highest_interaction_term = highest_interaction_term
self.nparallelism = nparallelism
self.type = type
self._parms["_rest_version"] = 3

Expand Down Expand Up @@ -745,20 +740,6 @@ def highest_interaction_term(self, highest_interaction_term):
assert_is_type(highest_interaction_term, None, int)
self._parms["highest_interaction_term"] = highest_interaction_term

@property
def nparallelism(self):
"""
Number of models to build in parallel. Default to 4. Adjust according to your system.

Type: ``int``, defaults to ``4``.
"""
return self._parms.get("nparallelism")

@nparallelism.setter
def nparallelism(self, nparallelism):
assert_is_type(nparallelism, None, int)
self._parms["nparallelism"] = nparallelism

@property
def type(self):
"""
Expand Down
39 changes: 0 additions & 39 deletions h2o-py/h2o/estimators/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,6 @@ def __init__(self,
max_after_balance_size=5.0, # type: float
max_confusion_matrix_size=20, # type: int
max_runtime_secs=0.0, # type: float
custom_metric_func=None, # type: Optional[str]
nparallelism=0, # type: int
max_predictor_number=1, # type: int
min_predictor_number=1, # type: int
mode="maxr", # type: Literal["allsubsets", "maxr", "maxrsweep", "backward"]
Expand Down Expand Up @@ -290,13 +288,6 @@ def __init__(self,
:param max_runtime_secs: Maximum allowed runtime in seconds for model training. Use 0 to disable.
Defaults to ``0.0``.
:type max_runtime_secs: float
:param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
Defaults to ``None``.
:type custom_metric_func: str, optional
:param nparallelism: number of models to build in parallel. Defaults to 0.0 which is adaptive to the system
capability
Defaults to ``0``.
:type nparallelism: int
:param max_predictor_number: Maximum number of predictors to be considered when building GLM models. Defaults
to 1.
Defaults to ``1``.
Expand Down Expand Up @@ -383,8 +374,6 @@ def __init__(self,
self.max_after_balance_size = max_after_balance_size
self.max_confusion_matrix_size = max_confusion_matrix_size
self.max_runtime_secs = max_runtime_secs
self.custom_metric_func = custom_metric_func
self.nparallelism = nparallelism
self.max_predictor_number = max_predictor_number
self.min_predictor_number = min_predictor_number
self.mode = mode
Expand Down Expand Up @@ -1124,34 +1113,6 @@ def max_runtime_secs(self, max_runtime_secs):
assert_is_type(max_runtime_secs, None, numeric)
self._parms["max_runtime_secs"] = max_runtime_secs

@property
def custom_metric_func(self):
"""
Reference to custom evaluation function, format: `language:keyName=funcName`

Type: ``str``.
"""
return self._parms.get("custom_metric_func")

@custom_metric_func.setter
def custom_metric_func(self, custom_metric_func):
assert_is_type(custom_metric_func, None, str)
self._parms["custom_metric_func"] = custom_metric_func

@property
def nparallelism(self):
"""
number of models to build in parallel. Defaults to 0.0 which is adaptive to the system capability

Type: ``int``, defaults to ``0``.
"""
return self._parms.get("nparallelism")

@nparallelism.setter
def nparallelism(self, nparallelism):
assert_is_type(nparallelism, None, int)
self._parms["nparallelism"] = nparallelism

@property
def max_predictor_number(self):
"""
Expand Down
7 changes: 0 additions & 7 deletions h2o-r/h2o-package/R/anovaglm.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
#' @param save_transformed_framekeys \code{Logical}. true to save the keys of transformed predictors and interaction column. Defaults to FALSE.
#' @param highest_interaction_term Limit the number of interaction terms, if 2 means interaction between 2 columns only, 3 for three columns and
#' so on... Default to 2. Defaults to 0.
#' @param nparallelism Number of models to build in parallel. Default to 4. Adjust according to your system. Defaults to 4.
#' @param type Refer to the SS type 1, 2, 3, or 4. We are currently only supporting 3 Defaults to 0.
#' @examples
#' \dontrun{
Expand Down Expand Up @@ -126,7 +125,6 @@ h2o.anovaglm <- function(x,
max_runtime_secs = 0,
save_transformed_framekeys = FALSE,
highest_interaction_term = 0,
nparallelism = 4,
type = 0)
{
# Validate required training_frame first and other frame args: should be a valid key or an H2OFrame object
Expand Down Expand Up @@ -213,8 +211,6 @@ h2o.anovaglm <- function(x,
parms$save_transformed_framekeys <- save_transformed_framekeys
if (!missing(highest_interaction_term))
parms$highest_interaction_term <- highest_interaction_term
if (!missing(nparallelism))
parms$nparallelism <- nparallelism
if (!missing(type))
parms$type <- type

Expand Down Expand Up @@ -256,7 +252,6 @@ h2o.anovaglm <- function(x,
max_runtime_secs = 0,
save_transformed_framekeys = FALSE,
highest_interaction_term = 0,
nparallelism = 4,
type = 0,
segment_columns = NULL,
segment_models_id = NULL,
Expand Down Expand Up @@ -348,8 +343,6 @@ h2o.anovaglm <- function(x,
parms$save_transformed_framekeys <- save_transformed_framekeys
if (!missing(highest_interaction_term))
parms$highest_interaction_term <- highest_interaction_term
if (!missing(nparallelism))
parms$nparallelism <- nparallelism
if (!missing(type))
parms$type <- type

Expand Down
15 changes: 0 additions & 15 deletions h2o-r/h2o-package/R/modelselection.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@
#' @param max_after_balance_size Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires
#' balance_classes. Defaults to 5.0.
#' @param max_runtime_secs Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to 0.
#' @param custom_metric_func Reference to custom evaluation function, format: `language:keyName=funcName`
#' @param nparallelism number of models to build in parallel. Defaults to 0.0 which is adaptive to the system capability Defaults to
#' 0.
#' @param max_predictor_number Maximum number of predictors to be considered when building GLM models. Defaults to 1. Defaults to 1.
#' @param min_predictor_number For mode = 'backward' only. Minimum number of predictors to be considered when building GLM models starting
#' with all predictors to be included. Defaults to 1. Defaults to 1.
Expand Down Expand Up @@ -190,8 +187,6 @@ h2o.modelSelection <- function(x,
class_sampling_factors = NULL,
max_after_balance_size = 5.0,
max_runtime_secs = 0,
custom_metric_func = NULL,
nparallelism = 0,
max_predictor_number = 1,
min_predictor_number = 1,
mode = c("allsubsets", "maxr", "maxrsweep", "backward"),
Expand Down Expand Up @@ -315,10 +310,6 @@ h2o.modelSelection <- function(x,
parms$max_after_balance_size <- max_after_balance_size
if (!missing(max_runtime_secs))
parms$max_runtime_secs <- max_runtime_secs
if (!missing(custom_metric_func))
parms$custom_metric_func <- custom_metric_func
if (!missing(nparallelism))
parms$nparallelism <- nparallelism
if (!missing(max_predictor_number))
parms$max_predictor_number <- max_predictor_number
if (!missing(min_predictor_number))
Expand Down Expand Up @@ -387,8 +378,6 @@ h2o.modelSelection <- function(x,
class_sampling_factors = NULL,
max_after_balance_size = 5.0,
max_runtime_secs = 0,
custom_metric_func = NULL,
nparallelism = 0,
max_predictor_number = 1,
min_predictor_number = 1,
mode = c("allsubsets", "maxr", "maxrsweep", "backward"),
Expand Down Expand Up @@ -517,10 +506,6 @@ h2o.modelSelection <- function(x,
parms$max_after_balance_size <- max_after_balance_size
if (!missing(max_runtime_secs))
parms$max_runtime_secs <- max_runtime_secs
if (!missing(custom_metric_func))
parms$custom_metric_func <- custom_metric_func
if (!missing(nparallelism))
parms$nparallelism <- nparallelism
if (!missing(max_predictor_number))
parms$max_predictor_number <- max_predictor_number
if (!missing(min_predictor_number))
Expand Down