Skip to content

Commit

Permalink
Move test to check init values are set correctly to Python from Java.…
Browse files Browse the repository at this point in the history
… I was not able to find a good combination of initial betas/ubetas and t matrix to make it work.
  • Loading branch information
wendycwong committed Oct 28, 2024
1 parent c36c312 commit b708d3c
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 54 deletions.
55 changes: 1 addition & 54 deletions h2o-algos/src/test/java/hex/hglm/HGLMBasicTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,7 @@ public double[] grabRow2Arrays(String[] enumPredNames, String[] numPredNames, bo
}

if (hasIntercept)
rowValues.add(1.0);

rowValues.add(1.0);
return rowValues.stream().mapToDouble(Double::doubleValue).toArray();
}

Expand Down Expand Up @@ -551,58 +550,6 @@ public void testMatVecFormation() {
Scope.exit();
}
}

/*
* We will try to set initial values for fixed coefficients, random coefficients, T matrix, and sigma values and make
* sure they are set correctly
*/
@Test
public void testSetInitBetasTvar() {
Scope.enter();
try {
Frame trainingFrame = parseAndTrackTestFile("smalldata/hglm_test/gaussian_0GC_123R_all5Numeric_p2noise_p08T_woIntercept_standardize.gz");
DKV.put(trainingFrame);
double[] initBetas = new double[]{1.556019455245578, 0.004387668926465224, -0.012162334935702198,
0.007090905470190553, 0.6642825159614225, -0.6565480160986089};
Frame ubetaFrame = new TestFrameBuilder()
.withColNames("C1", "C2", "C3")
.withVecTypes(T_NUM, T_NUM, T_NUM)
.withDataForCol(0, new double[]{-0.939297854112798, -1.3831760587493842, -0.550911700979278,
-0.7520414750585163, -0.7695703224787936, -1.285064572001838, -1.2685471084776616, -1.1851850464065945})
.withDataForCol(1, new double[]{0.6201893456700508, 0.23799145042381686, 0.249791875918212,
0.37804734995822303, 0.5427148638497548, 0.3811004571313795, 0.40211187674359433, 0.48968036339542964})
.withDataForCol(2, new double[]{0.8393873836611673, 0.8284318083138673, 0.8027667198768048,
0.8479753981593126, 0.8338039588079964, 0.8536416345174341, 0.8304699206057965, 0.8415980227739538})
.build();
Scope.track(ubetaFrame);
Frame tMat = new TestFrameBuilder()
.withColNames("C1","C2","C3")
.withVecTypes(T_NUM, T_NUM, T_NUM)
.withDataForCol(0, new double[]{1.1156288418645166, -0.4164395852593521, -0.850487353031379})
.withDataForCol(1, new double[]{-0.4164395852593521, 0.1863522738575454, 0.3453469357885868})
.withDataForCol(2, new double[]{-0.850487353031379, 0.3453469357885868, 0.6974072622840342})
.build();
Scope.track(tMat);
Scope.track(trainingFrame);
double sigmaEpsilon = 0.09847638;
HGLMModel.HGLMParameters params = new HGLMModel.HGLMParameters();
params._train = trainingFrame._key;
params._response_column = "response";
params._group_column = "C1";
params._random_intercept = false;
params._random_columns = new String[]{"C10", "C2", "C3"};
params._initial_fixed_effects = initBetas;
params._initial_t_matrix = tMat._key;
params._initial_random_effects = ubetaFrame._key;
params._tau_e_var_init = sigmaEpsilon;
params._max_iterations = 0;
HGLMModel model = new HGLM(params).trainModel().get();
Scope.track_generic(model);
checkCorrectInitValue(model, initBetas, ubetaFrame, tMat, sigmaEpsilon);
} finally {
Scope.exit();
}
}

public void checkCorrectInitValue(HGLMModel model, double[] initBetas, Frame ubetaFrame, Frame tMat, double sigmaEpsilon) {
// check fixed coefficient initialization
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import sys
sys.path.insert(1,"../../../")
import h2o
from tests import pyunit_utils
from h2o.estimators.hglm import H2OHGLMEstimator as hglm
from tests.pyunit_utils import utils_for_glm_hglm_tests

# in this test, want to check the following with random intercept:
# 1.scoring history (both training and valid)
# 2. the model summary
# 3. Fixed effect coefficients, normal and standardized
# 4. icc
# 5. residual variance
def test_scoring_history_model_summary():
h2o_data = h2o.import_file(path=pyunit_utils.locate("smalldata/hglm_test/gaussian_0GC_123R_all5Numeric_p2noise_p08T_woIntercept_standardize.gz"))
beta = [1.5606284972932365, -0.0002347762275008978, -0.007899880335654788, 0.0018421903682971376,
0.6654323495890934, -0.6544609203736372]
# ubeta = [[-0.939297854112798, -1.3831760587493842, -0.550911700979278, -0.7520414750585163, -0.7695703224787936,
# -1.285064572001838, -1.2685471084776616, -1.1851850464065945],
# [0.6201893456700508, 0.23799145042381686, 0.249791875918212, 0.37804734995822303, 0.5427148638497548,
# 0.3811004571313795, 0.40211187674359433, 0.48968036339542964],
# [0.8393873836611673, 0.8284318083138673, 0.8027667198768048, 0.8479753981593126, 0.8338039588079964,
# 0.8536416345174341, 0.8304699206057965, 0.8415980227739538]]
ubeta = [[-0.9319187693195115, 0.6070501821727673, 0.8394540491750797],
[-1.3823145230494698, 0.21486874352840676, 0.8366860141888742],
[-0.552534049777237, 0.24577758770128783, 0.8172622402154629],
[-0.7632283839126288, 0.3662979940622124, 0.8382611342477616],
[-0.7660574987463035, 0.5278044590884986, 0.8421686869476276],
[-1.2704526364630178, 0.3882261064670864, 0.8626801006264753],
[-1.2615857701992563, 0.39167873788423885, 0.8448421359246485],
[-1.1863349889243804, 0.4802231651611951, 0.852783164270973]]
ubeta_init = h2o.H2OFrame(ubeta)
t_mat = [[1.1086713375915982, -0.40493787563311834, -0.8561132576680854],
[-0.40493787563311834, 0.17812207973788066, 0.33964543424526844],
[-0.8561132576680854, 0.33964543424526844, 0.709024192121366]]
t_mat_init = h2o.H2OFrame(t_mat)
y = "response"
x = h2o_data.names
x.remove("response")
x.remove("C1")
random_columns = ["C2", "C3", "C4"]
# hglm_model = hglm(random_columns=random_columns, group_column = "C1", score_each_iteration=True, seed=12345,
# max_iterations = 20, random_intercept = False)
hglm_model = hglm(random_columns = random_columns, group_column = "C1", seed = 12345, max_iterations = 0,
random_intercept = False, initial_fixed_effects = beta, initial_random_effects = ubeta_init,
initial_t_matrix = t_mat_init)
hglm_model.train(x=x, y=y, training_frame=h2o_data)
# check and make sure the fixed effect coeffs, random effect coeffs and matrix T from model should equal to the
# original initial values since we set max_iterations = 0
beta_model = hglm_model.coef()
# compare intital beta
for index in range(4):
assert abs(beta[index]-beta_model[x[index]]) < 1e-6, \
"fixed coefficients for {0} from model: {1}, from initialization: {2} should be the same but is " \
"not.".format(x[index], beta_model[x[index]], beta[index])
ubeta_model = hglm_model.coefs_random()
level_2_names = hglm_model.level_2_names()
for index in range(len(level_2_names)):
pyunit_utils.equal_two_arrays(ubeta[index], ubeta_model[level_2_names[index]])
t_mat_model = hglm_model.matrix_T()
for index in range(len(t_mat_model)):
pyunit_utils.equal_two_arrays(t_mat[index], t_mat_model[index])





if __name__ == "__main__":
pyunit_utils.standalone_test(test_scoring_history_model_summary)
else:
test_scoring_history_model_summary()

0 comments on commit b708d3c

Please sign in to comment.