diff --git a/tests/test_formula.py b/tests/test_formula.py index 8ea19d76..4737e12b 100644 --- a/tests/test_formula.py +++ b/tests/test_formula.py @@ -457,6 +457,7 @@ def test_all_names_against_from_pandas(df, categorical_format): assert mat_from_formula.term_names == mat_from_pandas.term_names +@pytest.mark.skip(reason="We handle categorical names differently from formulaic") @pytest.mark.parametrize( "ensure_full_rank", [True, False], ids=["full_rank", "all_levels"] ) @@ -480,19 +481,29 @@ def test_all_names_against_from_pandas(df, categorical_format): pytest.param( "1 + C(cat_1, spans_intercept=False) * cat_2 * cat_3", id="custom_contrasts", + marks=pytest.mark.xfail( + reason="Non-uniform categorical formats are not yet supported" + ), ), ], ) def test_names_against_pandas(df, formula, ensure_full_rank): + if ensure_full_rank: + categorical_format = "{name}[T.{category}]" + else: + categorical_format = "{name}[{category}]" + num_in_scope = 2 # noqa + model_df = formulaic.model_matrix(formula, df, ensure_full_rank=ensure_full_rank) model_tabmat = tm.from_formula( formula, df, ensure_full_rank=ensure_full_rank, - categorical_format="{name}[T.{category}]", + categorical_format=categorical_format, context=0, ) + assert model_tabmat.model_spec.column_names == model_df.model_spec.column_names assert model_tabmat.model_spec.column_names == tuple(model_df.columns) assert model_tabmat.column_names == list(model_df.columns)