Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-6722 constraint glm #16035

Merged
merged 24 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cceac0d
GH-6722: add ability to have user find glm coefficient names without …
wendycwong Jul 17, 2023
847dcd5
Incorporate Veronika Maurever comments.
Apr 5, 2024
cc216b4
incorporate more Veronika comments.
Apr 6, 2024
bf6ef00
incorporating veronika comments
Apr 6, 2024
d8dd08a
add error messages of all coefficients for beta constraint breakers i…
Apr 8, 2024
60f9c70
add comments to my work and refer to the document on constrained GLM.
Apr 8, 2024
a12d3c3
Fix test failures caused by files not found.
Apr 9, 2024
19985bf
add NPE check to prevent error.
Apr 10, 2024
7c50d99
Fixed bug in code.
Apr 10, 2024
086f508
add npe check before accessing submodel content
Apr 11, 2024
942e1c5
Fix npe in test by instantiating class before referring to it.
Apr 12, 2024
afe4e54
Fix R cmd check error.
Apr 15, 2024
09a6bab
shorten tests for models.R
Apr 15, 2024
3e36190
change example in models.R to fix R cmd failure
Apr 15, 2024
a318bee
convert data frame to h2o frame before using it.
Apr 16, 2024
22e6595
add solver option
Apr 16, 2024
792aeba
fix R cmd check error. Wrong linear constraint type was specified.
Apr 16, 2024
51bcfbd
Fix test failure
Apr 17, 2024
a7c7190
fix R cmd check failure
Apr 17, 2024
0e33b53
remove coefficient comparison in tests.
Apr 18, 2024
bba5cbb
remove coeff comparison.
Apr 19, 2024
2784498
Shorten example code length
Apr 22, 2024
1b36e41
Revert ArrayUtils.maxValue(devHistoryTrain) to original implementatio…
Apr 22, 2024
24077a5
shortened examples in models.R to fix R cmd check error.
Apr 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import org.junit.Test;
import org.junit.runner.RunWith;
import water.*;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.runner.CloudSize;
import water.runner.H2ORunner;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,13 @@ public static void copy2DArray(double[][] src_array, double[][] dest_array) {
src_array[colIdx].length);
}
}

// copy a square array
public static double[][] copy2DArray(double[][] src_array) {
double[][] dest_array = MemoryManager.malloc8d(src_array.length, src_array[0].length);
copy2DArray(src_array, dest_array);
return dest_array;
}

public static void copy2DArray(int[][] src_array, int[][] dest_array) {
int numRows = src_array.length;
Expand Down
4 changes: 4 additions & 0 deletions h2o-algos/src/main/java/hex/glm/CoefIndices.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package hex.glm;

public interface CoefIndices {
}
505 changes: 488 additions & 17 deletions h2o-algos/src/main/java/hex/glm/ComputationState.java

Large diffs are not rendered by default.

859 changes: 859 additions & 0 deletions h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java

Large diffs are not rendered by default.

748 changes: 557 additions & 191 deletions h2o-algos/src/main/java/hex/glm/GLM.java

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions h2o-algos/src/main/java/hex/glm/GLMModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ public static class GLMParameters extends Model.Parameters {
public enum MissingValuesHandling {
MeanImputation, PlugValues, Skip
}
public enum Constraints {EqualTo, LessThanEqualTo};
public String algoName() { return "GLM"; }
public String fullName() { return "Generalized Linear Modeling"; }
public String javaName() { return GLMModel.class.getName(); }
Expand Down Expand Up @@ -515,6 +516,8 @@ public enum MissingValuesHandling {
public StringPair[] _interaction_pairs=null;
public boolean _early_stopping = true;
public Key<Frame> _beta_constraints = null;
public Key<Frame> _linear_constraints = null;
public boolean _expose_constraints = false; // internal parameter for testing only.
public Key<Frame> _plug_values = null;
// internal parameter, handle with care. GLM will stop when there is more than this number of active predictors (after strong rule screening)
public int _max_active_predictors = -1;
Expand All @@ -534,6 +537,14 @@ public enum MissingValuesHandling {
public double _dispersion_learning_rate = 0.5;
public Influence _influence; // if set to dfbetas will calculate the difference of betas obtained from including and excluding a data row
public boolean _keepBetaDiffVar = false; // if true, will keep the frame generating the beta without from i and the variance estimation
boolean _testCSZeroGram = false; // internal parameter, to test zero gram dropped column is correctly implemented
public boolean _separate_linear_beta = false; // if true, will perform the beta and linear constraint separately
public boolean _init_optimal_glm = false; // only used when there is linear constraints
public double _constraint_eta0 = 0.1258925; // eta_k = constraint_eta0/pow(constraint_c0, constraint_alpha)
public double _constraint_tau = 10; // ck+1 = tau*ck
public double _constraint_alpha = 0.1; // eta_k = constraint_eta_0/pow(constraint_c0, constraint_alpha)
public double _constraint_beta = 0.9; // eta_k+1 = eta_k/pow(c_k, beta)
public double _constraint_c0 = 10; // set initial epsilon k as 1/c0

public void validate(GLM glm) {
if (_remove_collinear_columns) {
Expand Down Expand Up @@ -1552,6 +1563,9 @@ public static class GLMOutput extends Model.Output {
public int _selected_submodel_idx; // submodel index with best deviance
public int _best_submodel_idx; // submodel index with best deviance
public int _best_lambda_idx; // the same as best_submodel_idx, kept to ensure backward compatibility
public String[] _linear_constraint_states;
public boolean _all_constraints_satisfied;
public TwoDimTable _linear_constraints_table;
public Key<Frame> _regression_influence_diagnostics;
public Key<Frame> _betadiff_var; // for debugging only, no need to serialize
public double lambda_best(){return _submodels.length == 0 ? -1 : _submodels[_best_submodel_idx].lambda_value;}
Expand All @@ -1577,6 +1591,12 @@ public double lambda_selected(){
private double _dispersion;
private boolean _dispersionEstimated;
public int[] _activeColsPerClass;
public ConstrainedGLMUtils.LinearConstraints[] _equalityConstraintsLinear = null;
public ConstrainedGLMUtils.LinearConstraints[] _lessThanEqualToConstraintsLinear = null;
public ConstrainedGLMUtils.LinearConstraints[] _equalityConstraintsBeta = null;
public ConstrainedGLMUtils.LinearConstraints[] _lessThanEqualToConstraintsBeta = null;
public String[] _constraintCoefficientNames = null;
public double[][] _initConstraintMatrix = null;
public boolean hasPValues(){return _zvalues != null;}
public boolean hasVIF() { return _vif_predictor_names != null; }

Expand Down
13 changes: 11 additions & 2 deletions h2o-algos/src/main/java/hex/glm/GLMTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ public final void reduce(GLMGradientTask gmgt){
_likelihood += gmgt._likelihood;
}
@Override public final void postGlobal(){
ArrayUtils.mult(_gradient,_reg);
ArrayUtils.mult(_gradient,_reg); // reg is obj_reg
for(int j = 0; j < _beta.length - 1; ++j)
_gradient[j] += _currentLambda * _beta[j]; // add L2 constraint for gradient
if ((_penalty_mat != null) && (_gamBetaIndices != null)) // update contribution from gam smoothness constraint
Expand Down Expand Up @@ -1522,6 +1522,7 @@ public static class GLMIterationTask extends FrameTask2<GLMIterationTask> {
double wsum, sumOfRowWeights;
double _sumsqe;
int _c = -1;
boolean _hasConstraints = false;

public double[] getXY() {
return _xy;
Expand All @@ -1546,6 +1547,15 @@ public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double
_c = c;
}

public GLMIterationTask(Key jobKey, DataInfo dinfo, GLMWeightsFun glmw, double [] beta, int c, boolean hasConst) {
super(null,dinfo,jobKey);
_beta = beta;
_ymu = null;
_glmf = glmw;
_c = c;
_hasConstraints = hasConst;
}

@Override public boolean handlesSparseData(){return true;}

transient private double _sparseOffset;
Expand Down Expand Up @@ -1659,7 +1669,6 @@ public boolean hasNaNsOrInf() {
return ArrayUtils.hasNaNsOrInfs(_xy) || _gram.hasNaNsOrInfs();
}
}


/* public static class GLMCoordinateDescentTask extends FrameTask2<GLMCoordinateDescentTask> {
final GLMParameters _params;
Expand Down
8 changes: 8 additions & 0 deletions h2o-algos/src/main/java/hex/glm/GLMUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -365,4 +365,12 @@ public static Frame buildRIDFrame(GLMModel.GLMParameters parms, Frame train, Fra
train.add(parms._response_column, responseVec);
return train;
}

public static boolean notZeroLambdas(double[] lambdas) {
if (lambdas == null) {
return false;
} else {
return ((int) Arrays.stream(lambdas).filter(x -> x != 0.0).boxed().count()) > 0;
}
}
}
16 changes: 16 additions & 0 deletions h2o-algos/src/main/java/hex/gram/Gram.java
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ public void addGAMPenalty(Integer[] activeColumns, double[][][] ds, int[][] gamI
}
}
}

public void addGAMPenalty(double[][][] ds, int[][] gamIndices, double[][] xmatrix) {
int numGamCols = gamIndices.length;
int numKnots;
int betaIndex, betaIndexJ;
for (int gamInd=0; gamInd<numGamCols; gamInd++) {
numKnots = gamIndices[gamInd].length;
for (int betaInd=0; betaInd<numKnots; betaInd++) {
betaIndex = gamIndices[gamInd][betaInd];
for (int betaIndj=0; betaIndj<= betaInd; betaIndj++) {
betaIndexJ = gamIndices[gamInd][betaIndj];
xmatrix[betaIndex][betaIndexJ] += 2*ds[gamInd][betaInd][betaIndj];
}
}
}
}

public double get(int i, int j) {
if(j > i) {
Expand Down
141 changes: 140 additions & 1 deletion h2o-algos/src/main/java/hex/optimization/OptimizationUtils.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
package hex.optimization;


import hex.glm.ComputationState;
import hex.glm.ConstrainedGLMUtils;
import hex.glm.GLM;
import water.Iced;
import water.util.ArrayUtils;
import water.util.Log;

import java.util.Arrays;
import java.util.List;

import static hex.glm.ComputationState.EPS_CS_SQUARE;
import static hex.glm.ConstrainedGLMUtils.*;

/**
* Created by tomasnykodym on 9/29/15.
*/
public class OptimizationUtils {

public static class GradientInfo extends Iced {
public double _objVal;
public double [] _gradient;
Expand Down Expand Up @@ -456,4 +462,137 @@ public GradientInfo ginfo() {
}
}

/***
* This class implements the exact line search described in the doc, Algorithm 11.5
*/
public static final class ExactLineSearch {
public final double _betaLS1 = 1e-4;
public final double _betaLS2 = 0.99;
public final double _lambdaLS = 2;
public double _alphal;
public double _alphar;
public double _alphai;
public double[] _direction; // beta_k+1 - beta_k
public int _maxIteration = 50; // 40 too low for tight constraints
public double[] _originalBeta;
public double[] _newBeta;
public GLM.GLMGradientInfo _ginfoOriginal;
public double _currGradDirIP; // current gradient and direction inner product
public String[] _coeffNames;

public ExactLineSearch(double[] betaCnd, ComputationState state, List<String> coeffNames) {
reset(betaCnd, state, coeffNames);
}

public void reset(double[] betaCnd, ComputationState state, List<String> coeffNames) {
_direction = new double[betaCnd.length];
ArrayUtils.subtract(betaCnd, state.beta(), _direction);
_ginfoOriginal = state.ginfo();
_originalBeta = state.beta();
_alphai = 1;
_alphal = 0;
_alphar = Double.POSITIVE_INFINITY;
_coeffNames = coeffNames.toArray(new String[0]);
_currGradDirIP = ArrayUtils.innerProduct(_ginfoOriginal._gradient, _direction);
}

/***
* Evaluate and make sure that step size alphi is not too big so that objective function is still decreasing. Refer
* to the doc, Definition 11.6
*/
public boolean evaluateFirstWolfe(GLM.GLMGradientInfo ginfoNew) {
double newObj = ginfoNew._objVal;
double rhs = _ginfoOriginal._objVal+_alphai*_betaLS1* _currGradDirIP;
return (newObj <= rhs);
}

/***
* Evaluate and make sure that step size alphi is not too small so that good progress is made in reducing the
* loss function. Refer to the doc, Definition 11.8
*/
public boolean evaluateSecondWolfe(GLM.GLMGradientInfo ginfo) {
double lhs = ArrayUtils.innerProduct(ginfo._gradient, _direction);
return lhs >= _betaLS2* _currGradDirIP;
}

public boolean setAlphai(boolean firstWolfe, boolean secondWolfe) {
if (!firstWolfe && secondWolfe) { // step is too long
_alphar = _alphai;
_alphai = 0.5*(_alphal+_alphar);
return true;
} else if (firstWolfe && !secondWolfe) { // step is too short
_alphal = _alphai;
if (_alphar < Double.POSITIVE_INFINITY)
_alphai = 0.5*(_alphal+_alphar);
else
_alphai = _lambdaLS * _alphai;
return true;
}
return false;
}

public void setBetaConstraintsDeriv(double[] lambdaEqual, double[] lambdaLessThan, ComputationState state,
ConstrainedGLMUtils.LinearConstraints[] equalityConstraints,
ConstrainedGLMUtils.LinearConstraints[] lessThanEqualToConstraints,
GLM.GLMGradientSolver gradientSolver, double[] betaCnd) {
_newBeta = betaCnd;
updateConstraintValues(betaCnd, Arrays.asList(_coeffNames), equalityConstraints, lessThanEqualToConstraints);
calculateConstraintSquare(state, equalityConstraints, lessThanEqualToConstraints);
// update gradient from constraints transpose(lambda)*h(beta)(value, not changed, active status may change)
// and gram contribution from ck/2*transpose(h(beta))*h(beta)), value not changed but active status may change
state.updateConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
// calculate new gradient and objective function;
_ginfoOriginal = calGradient(betaCnd, state, gradientSolver, lambdaEqual, lambdaLessThan,
equalityConstraints, lessThanEqualToConstraints);
}

/***
* Implements the Line Search algorithm in the doc, Algorithm 11.5.
*/
public boolean findAlpha(double[] lambdaEqual, double[] lambdaLessThan, ComputationState state,
ConstrainedGLMUtils.LinearConstraints[] equalityConstraints,
ConstrainedGLMUtils.LinearConstraints[] lessThanEqualToConstraints,
GLM.GLMGradientSolver gradientSolver) {
if (_currGradDirIP > 0) {
return false;
}
GLM.GLMGradientInfo newGrad;
double[] newCoef;
int betaLen = _originalBeta.length;
double[] tempDirection = new double[betaLen];
boolean firstWolfe;
boolean secondWolfe;
boolean alphaiChange;
for (int index=0; index<_maxIteration; index++) {
ArrayUtils.mult(_direction, tempDirection, _alphai); // tempCoef=alpha_i*direction
newCoef = ArrayUtils.add(tempDirection, _originalBeta); // newCoef = coef + alpha_i*direction
// calculate constraint values with new coefficients, constraints magnitude square
updateConstraintValues(newCoef, Arrays.asList(_coeffNames), equalityConstraints, lessThanEqualToConstraints);
calculateConstraintSquare(state, equalityConstraints, lessThanEqualToConstraints);
// update gradient from constraints transpose(lambda)*h(beta)(value, not changed, active status may change)
// and gram contribution from ck/2*transpose(h(beta))*h(beta)), value not changed but active status may change
state.updateConstraintInfo(equalityConstraints, lessThanEqualToConstraints);
// calculate new gradient and objective function for new coefficients newCoef
newGrad = calGradient(newCoef, state, gradientSolver, lambdaEqual, lambdaLessThan,
equalityConstraints, lessThanEqualToConstraints);
// evaluate if first Wolfe condition is satisfied;
firstWolfe = evaluateFirstWolfe(newGrad);
// evaluate if second Wolfe condition is satisfied;
secondWolfe = evaluateSecondWolfe(newGrad);
// return if both conditions are satisfied;
if (firstWolfe && secondWolfe) {
_newBeta = newCoef;
_ginfoOriginal = newGrad;
return true;
}

// set alphai if first Wolfe condition is not satisfied, set alpha i if second Wolfe condition is not satisfied;
alphaiChange = setAlphai(firstWolfe, secondWolfe);
if (!alphaiChange || _alphar < EPS_CS_SQUARE) { // if alphai, alphar value are not changed and alphar is too small, quit
return false;
}
}
return false;
}
}
}
27 changes: 22 additions & 5 deletions h2o-algos/src/main/java/hex/schemas/GLMModelV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,24 @@ public static final class GLMModelOutputV3 extends ModelOutputSchemaV3<GLMOutput

@API(help = "Predictor names where variable inflation factors are calculated.")
String[] vif_predictor_names;

@API(help = "GLM model coefficients names.")
String[] coefficient_names;

@API(help = "predictor variable inflation factors.")
double[] variable_inflation_factors;

@API(help = "Beta (if exists) and linear constraints states")
String[] linear_constraint_states;

@API(help = "Table of beta (if exists) and linear constraints values and status")
TwoDimTableV3 linear_constraints_table;

@API(help="Contains the original dataset and the dfbetas calculated for each predictor.")
KeyV3.FrameKeyV3 regression_influence_diagnostics;

@API(help="True if all constraints conditions are satisfied. Otherwise, false.")
boolean all_constraints_satisfied;

private GLMModelOutputV3 fillMultinomial(GLMOutput impl) {
if(impl.get_global_beta_multinomial() == null)
Expand Down Expand Up @@ -201,6 +213,9 @@ public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) {
alpha_best = impl.alpha_best();
best_submodel_index = impl.bestSubmodelIndex();
dispersion = impl.dispersion();
coefficient_names = impl.coefficientNames().clone();
if (impl._linear_constraint_states != null) // pass constraint conditions
linear_constraint_states = impl._linear_constraint_states.clone();
variable_inflation_factors = impl.getVariableInflationFactors();
vif_predictor_names = impl.hasVIF() ? impl.getVIFPredictorNames() : null;
List<String> validVIFNames = impl.hasVIF() ? Stream.of(vif_predictor_names).collect(Collectors.toList()) : null;
Expand All @@ -215,11 +230,13 @@ public GLMModelOutputV3 fillFromImpl(GLMModel.GLMOutput impl) {
random_coefficients_table.fillFromImpl(buildRandomCoefficients2DTable(impl.ubeta(), impl.randomcoefficientNames()));
}
double [] beta = impl.beta();
final double [] magnitudes = new double[beta.length];
int len = magnitudes.length - 1;
int[] indices = new int[len];
for (int i = 0; i < indices.length; ++i)
indices[i] = i;
final double [] magnitudes = beta==null?null:new double[beta.length];
int len = beta==null?0:magnitudes.length - 1;
int[] indices = beta==null?null:new int[len];
if (beta != null) {
for (int i = 0; i < indices.length; ++i)
indices[i] = i;
}

if(beta == null) beta = MemoryManager.malloc8d(names.length);
String [] colTypes = new String[]{"double"};
Expand Down
Loading
Loading