-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactored yaml pipeline_space #55
Conversation
…at the parameter gets deduced as float
c16b6fe
to
14a9189
Compare
# Check if 'is_fidelity' is a boolean | ||
if not isinstance(is_fidelity, bool): | ||
raise TypeError( | ||
f"Expected 'is_fidelity' to be a boolean, but got type: " | ||
f"{type(is_fidelity).__name__}" | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nothing to change, my general opinion is that there's types for a reason and you can't check every input that it's of the correct type. If conflicted on whether to check it or not, I usually favor on not.
However something like if not 0 <= value <= 1
should be checked as it can't be conveyed in a type.
|
||
# Validate 'log' and 'is_fidelity' types to prevent configuration errors | ||
# from the YAML input | ||
for param, value in {"log": log, "is_fidelity": is_fidelity}.items(): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can also do [("log", log), ("is_fidelity", is_fidelity)]
, just means you don't need to have the extra .items()
at the end :)
@@ -177,7 +183,7 @@ def pipeline_space_from_yaml( | |||
"For categorical parameter: cat, categorical\n" | |||
"For constant parameter: const, constant\n" | |||
) | |||
except (KeyError, TypeError, ValueError) as e: | |||
except (KeyError, TypeError, ValueError, FileNotFoundError) as e: | |||
raise SearchSpaceFromYamlFileError(e) from e | |||
return pipeline_space | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This whole block can really just be a series of try: excepts
rather than a nest try: except
. No need to change now, just for your info
def convert_scientific_notation(value: str | int | float, show_usage_flag=False) \ | ||
-> float | (float, bool): | ||
-> float | (float, bool): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
First, use tuple[float, bool]
, doing (float, bool)
isn't valid syntax.
Also try to avoid \
in code. Normally formatters and linters will fix this up in the following way which I believe is more readable for longer function signatures.
def convert_scientific_notation(
value: str | int | float,
show_usage_flag: bool = False
) -> float | tuple[float, bool]:
@pytest.mark.neps_api | ||
def test_correct_yaml_files(): | ||
def test_correct_yaml_file(path): | ||
"""Test the function with a correctly formatted YAML file.""" | ||
pipeline_space = pipeline_space_from_yaml(path) | ||
assert isinstance(pipeline_space, dict) | ||
assert isinstance(pipeline_space["param_float1"], FloatParameter) | ||
assert pipeline_space["param_float1"].lower == 0.00001 | ||
assert pipeline_space["param_float1"].upper == 0.1 | ||
assert pipeline_space["param_float1"].log is True | ||
assert pipeline_space["param_float1"].is_fidelity is False | ||
assert pipeline_space["param_float1"].default is None | ||
assert pipeline_space["param_float1"].default_confidence_score == 0.5 | ||
assert isinstance(pipeline_space["param_int1"], IntegerParameter) | ||
assert pipeline_space["param_int1"].lower == -3 | ||
assert pipeline_space["param_int1"].upper == 30 | ||
assert pipeline_space["param_int1"].log is False | ||
assert pipeline_space["param_int1"].is_fidelity is True | ||
assert pipeline_space["param_int1"].default is None | ||
assert pipeline_space["param_int1"].default_confidence_score == 0.5 | ||
assert isinstance(pipeline_space["param_int2"], IntegerParameter) | ||
assert pipeline_space["param_int2"].lower == 100 | ||
assert pipeline_space["param_int2"].upper == 30000 | ||
assert pipeline_space["param_int2"].log is True | ||
assert pipeline_space["param_int2"].is_fidelity is False | ||
assert pipeline_space["param_int2"].default is None | ||
assert pipeline_space["param_int2"].default_confidence_score == 0.5 | ||
assert isinstance(pipeline_space["param_float2"], FloatParameter) | ||
assert pipeline_space["param_float2"].lower == 3.3e-5 | ||
assert pipeline_space["param_float2"].upper == 0.15 | ||
assert pipeline_space["param_float2"].log is False | ||
assert pipeline_space["param_float2"].is_fidelity is False | ||
assert pipeline_space["param_float2"].default is None | ||
assert pipeline_space["param_float2"].default_confidence_score == 0.5 | ||
assert isinstance(pipeline_space["param_cat"], CategoricalParameter) | ||
assert pipeline_space["param_cat"].choices == [2, "sgd", 10e-3] | ||
assert pipeline_space["param_cat"].is_fidelity is False | ||
assert pipeline_space["param_cat"].default is None | ||
assert pipeline_space["param_cat"].default_confidence_score == 2 | ||
assert isinstance(pipeline_space["param_const1"], ConstantParameter) | ||
assert pipeline_space["param_const1"].value == 0.5 | ||
assert pipeline_space["param_const1"].is_fidelity is False | ||
assert isinstance(pipeline_space["param_const2"], ConstantParameter) | ||
assert pipeline_space["param_const2"].value == 1e3 | ||
assert pipeline_space["param_const2"].is_fidelity is True | ||
|
||
test_correct_yaml_file("tests/test_yaml_search_space/correct_config.yaml") | ||
test_correct_yaml_file( | ||
"tests/test_yaml_search_space/correct_config_including_types" ".yaml" | ||
) | ||
float1 = FloatParameter(0.00001, 0.1, True, False) | ||
assert float1.__eq__(pipeline_space["param_float1"]) is True | ||
int1 = IntegerParameter(-3, 30, False, True) | ||
assert int1.__eq__(pipeline_space["param_int1"]) is True | ||
int2 = IntegerParameter(100, 30000, True, False) | ||
assert int2.__eq__(pipeline_space["param_int2"]) is True | ||
float2 = FloatParameter(3.3e-5, 0.15, False, False) | ||
assert float2.__eq__(pipeline_space["param_float2"]) is True | ||
cat1 = CategoricalParameter([2, "sgd", 10e-3], False) | ||
assert cat1.__eq__(pipeline_space["param_cat"]) is True | ||
const1 = ConstantParameter(0.5, False) | ||
assert const1.__eq__(pipeline_space["param_const1"]) is True | ||
const2 = ConstantParameter(1e3, True) | ||
assert const2.__eq__(pipeline_space["param_const2"]) is True | ||
|
||
test_correct_yaml_file(BASE_PATH + "correct_config.yaml") | ||
test_correct_yaml_file(BASE_PATH + "correct_config_including_types.yaml") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Three things on code quality improvements:
assert something() is True
is equivalent toassert something()
a.__eq__(b)
is equivalent toa == b
- You can use
pytest.mark.parametrize(...)
to essentially run this test twice, each with a different yaml file.
I would personally write this in the following way:
@pytest.mark.neps_api
@pytest.mark.parametrize(
"path",
[
BASE_PATH + "correct_config.yaml",
BASE_PATH + "correct_config_including_types.yaml"
]
)
def test_correct_yaml_files(path: str):
"""Test the function with a correctly formatted YAML file."""
pipeline_space = pipeline_space_from_yaml(path)
assert isinstance(pipeline_space, dict)
float1 = FloatParameter(0.00001, 0.1, True, False)
int1 = IntegerParameter(-3, 30, False, True)
int2 = IntegerParameter(100, 30000, True, False)
cat1 = CategoricalParameter([2, "sgd", 10e-3], False)
float2 = FloatParameter(3.3e-5, 0.15, False, False)
const1 = ConstantParameter(0.5, False)
const2 = ConstantParameter(1e3, True)
assert pipeline_space["param_float1"] == float1
assert pipeline_space["param_int1"] == int1
assert pipeline_space["param_int2"] == int2
assert pipeline_space["param_float2"]) == float2
assert pipeline_space["param_cat"] == cat1
assert pipeline_space["param_const1"] == const1
assert pipeline_space["param_const2"] == const2
BASE_PATH + "correct_config_including_priors.yml" | ||
) | ||
assert isinstance(pipeline_space, dict) | ||
assert isinstance(pipeline_space["learning_rate"], FloatParameter) | ||
assert pipeline_space["learning_rate"].lower == 0.00001 | ||
assert pipeline_space["learning_rate"].upper == 0.1 | ||
assert pipeline_space["learning_rate"].log is True | ||
assert pipeline_space["learning_rate"].is_fidelity is False | ||
assert pipeline_space["learning_rate"].default == 3.3e-2 | ||
assert pipeline_space["learning_rate"].default_confidence_score == 0.125 | ||
assert isinstance(pipeline_space["num_epochs"], IntegerParameter) | ||
assert pipeline_space["num_epochs"].lower == 3 | ||
assert pipeline_space["num_epochs"].upper == 30 | ||
assert pipeline_space["num_epochs"].log is False | ||
assert pipeline_space["num_epochs"].is_fidelity is True | ||
assert pipeline_space["num_epochs"].default == 10 | ||
assert pipeline_space["num_epochs"].default_confidence_score == 0.5 | ||
assert isinstance(pipeline_space["optimizer"], CategoricalParameter) | ||
assert pipeline_space["optimizer"].choices == ["adam", 90e-3, "rmsprop"] | ||
assert pipeline_space["optimizer"].is_fidelity is False | ||
assert pipeline_space["optimizer"].default == 90e-3 | ||
assert pipeline_space["optimizer"].default_confidence_score == 4 | ||
assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) | ||
assert pipeline_space["dropout_rate"].value == 1e3 | ||
assert pipeline_space["dropout_rate"].default == 1e3 | ||
float1 = FloatParameter(0.00001, 0.1, True, False, 3.3e-2, "high") | ||
assert float1.__eq__(pipeline_space["learning_rate"]) is True | ||
int1 = IntegerParameter(3, 30, False, True, 10) | ||
assert int1.__eq__(pipeline_space["num_epochs"]) is True | ||
cat1 = CategoricalParameter(["adam", 90e-3, "rmsprop"], False, 90e-3, "medium") | ||
assert cat1.__eq__(pipeline_space["optimizer"]) is True | ||
const1 = ConstantParameter(1e3, True) | ||
assert const1.__eq__(pipeline_space["dropout_rate"]) is True |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Refer to the previous suggestion, however you don't need the parametrize here
with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: | ||
pipeline_space_from_yaml( | ||
Path("tests/test_yaml_search_space/incorrect_config.txt") | ||
) | ||
pipeline_space_from_yaml(Path(BASE_PATH + "incorrect_config.txt")) | ||
assert excinfo.value.exception_type == "ValueError" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like this second assert excinfo.value.exception_type == "ValueError"
isn't needed. You've nicely given a specific error type of SearchSpaceFromYamlFileError
. Check that this error type is also a ValueError
is not what the test is about.
with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: | ||
pipeline_space_from_yaml(BASE_PATH + "not_allowed_key_config.yml") | ||
assert excinfo.value.exception_type == "KeyError" | ||
|
||
|
||
@pytest.mark.neps_api | ||
def test_yaml_file_default_parameter_not_in_range(): | ||
"""Test if the default value outside the specified range is | ||
correctly identified and handled.""" | ||
with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: | ||
pipeline_space_from_yaml(BASE_PATH + "default_not_in_range_config.yaml") | ||
assert excinfo.value.exception_type == "ValueError" | ||
|
||
|
||
@pytest.mark.neps_api | ||
def test_float_log_not_boolean(): | ||
"""Test if an exception is raised when the 'log' attribute is not a boolean.""" | ||
with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: | ||
pipeline_space_from_yaml(BASE_PATH + "not_boolean_type_log_config.yaml") | ||
assert excinfo.value.exception_type == "TypeError" | ||
|
||
|
||
@pytest.mark.neps_api | ||
def test_float_is_fidelity_not_boolean(): | ||
"""Test if an exception is raised when for FloatParameter the 'is_fidelity' | ||
attribute is not a boolean.""" | ||
with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: | ||
pipeline_space_from_yaml( | ||
"tests/test_yaml_search_space/not_allowed_key_config.yml" | ||
BASE_PATH + "not_boolean_type_is_fidelity_float_config.yaml" | ||
) | ||
assert excinfo.value.exception_type == "KeyError" | ||
assert excinfo.value.exception_type == "TypeError" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please see above comment about the additional assert
These proposals were implemented:
Added Boolean check in parameter classes float/categorical for log and is_fidelity to avoid interpret typing error wrong in yaml file + tests
Added logging info to inform the user that his e notation gets converted to float(parameter)
Added constraints for default value (have to be in range for float/int parameter) or in choices for categorical parameter + tests
Fixed Link in Docs