Skip to content

Commit

Permalink
Add a test for the substring column mapping transform
Browse files Browse the repository at this point in the history
This confirms that the transform handles the case where the values list doesn't
have length 2 by raising an error. This prompted me to make issue #146, which I
think should really simplify this transform.
  • Loading branch information
riley-harper committed Aug 27, 2024
1 parent 4158841 commit e4c9941
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions hlink/tests/core/transforms_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,35 @@ def test_apply_transform_remove_punctuation(spark: SparkSession, is_a: bool) ->
]


@pytest.mark.parametrize("values", [[1], [1, 2, 3]])
@pytest.mark.parametrize("is_a", [True, False])
def test_apply_transform_substring_error_when_not_exactly_2_values(
values: list[int], is_a: bool
) -> None:
"""
The substring transform takes a list of exactly two values, which are the
start position of the substring and its length. If the list has the wrong
number of values, then apply_transform() raises an error.
TODO: It would be simpler to have two separate attributes for the substring
start and length, like this:
{
"type": "substring",
"start_index": 0,
"length": 4,
}
See issue #146. Making these changes would eliminate the need for this
test.
"""
input_col = col("input")
transform = {"type": "substring", "values": values}

with pytest.raises(ValueError, match="Length of substr transform should be 2"):
apply_transform(input_col, transform, is_a)


@pytest.mark.parametrize("is_a", [True, False])
def test_apply_transform_error_when_unrecognized_transform_type(is_a: bool) -> None:
column_select = col("test")
Expand Down

0 comments on commit e4c9941

Please sign in to comment.