Skip to content

Commit

Permalink
minor addition of utility function for Python client.
Browse files Browse the repository at this point in the history
  • Loading branch information
wendycwong committed Oct 14, 2024
1 parent c10d94f commit 60ecdae
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 8 deletions.
6 changes: 6 additions & 0 deletions h2o-bindings/bin/custom/python/gen_hglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ def update_param(name, param):
return None

def class_extensions():
def level2_names(self):
"""
Get the level 2 column values.
"""
return self._model_json["output"]["group_column_names"]

def coefs_random_names(self):
"""
Get the random effect coefficient names including the intercept if applicable.
Expand Down
6 changes: 6 additions & 0 deletions h2o-py/h2o/estimators/hglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,12 @@ def gen_syn_data(self, gen_syn_data):
self._parms["gen_syn_data"] = gen_syn_data


def level2_names(self):
"""
Get the level 2 column values.
"""
return self._model_json["output"]["group_column_names"]

def coefs_random_names(self):
"""
Get the random effect coefficient names including the intercept if applicable.
Expand Down
46 changes: 38 additions & 8 deletions h2o-py/tests/pyunit_utils/utils_for_glm_hglm_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,34 @@ def find_model_iterations(glm_model):
iteration_index = glm_model._model_json["output"]["model_summary"].col_header.index("number_of_iterations")
return cell_values[lengths-1][iteration_index]

def normalize_random_coefs(random_coefs, numerical_cols, training_frame, num_level2):
"""
Given a random effect coefficients dict, this method will standardize/normalize the coefficients
:param random_coefs: python dict with random column names and a list of random coefficients for each level 2 index
:param numerical_cols: numerical columns of the frame
:param training_frame: h2o frame used to build the model
:return: python dict with random columns names and a list of normalized/standardized random coefficients
"""
all_random_names = random_coefs.keys()
normalized_coefs = dict()
# extract random coefficients for each level 2 value
for ind2 in range(num_level2):
# extract dict for one level 2 value
dictLevel2 = extractCoeffDic(random_coefs, ind2)
normalized_one_coefs = normalize_coefs(dictLevel2, numerical_cols, training_frame)
new_cnames = normalized_one_coefs.keys()


def extractCoeffDic(random_coeffs, ind2):
random_coef_names = random_coeffs.keys()
random_coef_level2 = dict()
for cname in random_coef_names:
random_coef_level2[cname] = random_coeffs[cname][ind2]
return random_coef_level2



def normalize_coefs(coefs, numerical_cols, training_frame):
"""
Given a coefficient as a dict, the method will normalized/standardized the given coefficents and return it in another
Expand All @@ -161,10 +189,11 @@ def normalize_coefs(coefs, numerical_cols, training_frame):
normalized_coefs = coefs.copy()
# only numerical coefficients are changed.
for cname in numerical_cols:
cmean = training_frame[cname].mean()[0,0]
csigma = training_frame[cname].sd()[0]
normalized_coefs[cname] = coefs[cname] * csigma
intercept_adjust = intercept_adjust + normalized_coefs[cname]*cmean/csigma
if cname in all_coefs_names:
cmean = training_frame[cname].mean()[0,0]
csigma = training_frame[cname].sd()[0]
normalized_coefs[cname] = coefs[cname] * csigma
intercept_adjust = intercept_adjust + normalized_coefs[cname]*cmean/csigma
if "intercept" in all_coefs_names:
normalized_coefs["intercept"] = coefs["intercept"]+intercept_adjust
else:
Expand All @@ -176,10 +205,11 @@ def denormalize_coefs(coefs_normalized, numerical_cols, training_frame):
all_coefs_names = coefs_normalized.keys()
denormalize_coefs = coefs_normalized.copy()
for cname in numerical_cols:
cmean = training_frame[cname].mean()[0,0]
csigma = training_frame[cname].sd()[0]
denormalize_coefs[cname] = coefs_normalized[cname] / csigma
intercept_adjust = intercept_adjust - cmean * coefs_normalized[cname] / csigma
if cname in all_coefs_names:
cmean = training_frame[cname].mean()[0,0]
csigma = training_frame[cname].sd()[0]
denormalize_coefs[cname] = coefs_normalized[cname] / csigma
intercept_adjust = intercept_adjust - cmean * coefs_normalized[cname] / csigma

if "intercept" in all_coefs_names:
denormalize_coefs["intercept"] = denormalize_coefs["intercept"] + intercept_adjust
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def test_scoring_history_model_summary():
coef_norm = hglm_model.coef_norm()
coef_names = hglm_model.coef_names()
coef_random = hglm_model.coefs_random()
coef_random_names = hglm_model.coefs_random_names()
coef_random_norm = hglm_model.coefs_random_norm()
coef_random_names_norm = hglm_model.coefs_random_names_norm()
t_mat = hglm_model.matrix_T()
residual_var = hglm_model.residual_variance()
mse = hglm_model.mse()
Expand All @@ -51,6 +54,7 @@ def test_scoring_history_model_summary():
pyunit_utils.assertCoefDictEqual(coef_norm, coef_norm_manually, 1e-6)
coef_manually = utils_for_glm_hglm_tests.denormalize_coefs(coef_norm, numerical_columns, train)
pyunit_utils.assertCoefDictEqual(coef, coef_manually, 1e-6)
# check random effect coefficients and normalized random effect coefficients are converted correctly.
print("Done")

if __name__ == "__main__":
Expand Down

0 comments on commit 60ecdae

Please sign in to comment.