Skip to content

Commit 5b558fd

Browse files
committed
rm poetry and mappings
1 parent 2638b4d commit 5b558fd

File tree

8 files changed

+167
-1421
lines changed

8 files changed

+167
-1421
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,14 @@ jobs:
2525
- name: Check-out repository
2626
uses: actions/checkout@v2
2727

28-
- name: Install poetry
29-
uses: snok/install-poetry@v1
30-
3128
- name: Install package
32-
run: poetry install
29+
run: pip install -r requirements.txt
3330

3431
- name: Test with pytest
35-
run: poetry run python -m pytest tests --doctest-modules --cov=cat2cat --cov-report=xml
32+
run: python -m pytest tests --doctest-modules --cov=cat2cat --cov-report=xml
3633

3734
- name: Test with mypy
38-
run: poetry run mypy src
35+
run: mypy src
3936

4037
- name: Upload coverage to Codecov
4138
uses: codecov/codecov-action@v3

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# Changelog
22

3-
## v0.1.4.9006
3+
## v0.1.4.9007
44

55
- New `cat2cat_ml_run` function to check the ml models performance before `cat2cat` with ml option is run. Now, the ml models are more transparent.
66
- Improved the lack of support for NaN and None in the `get_mappings`.
77
- Fixed a bug that `cat2cat_ml.features` can be only a `list` not a `Sequence`.
88
- Fixed assertion message and docs for the `freqs` argument in the `cat2cat_mappings`.
99
- Fixed some typing, and bring the clear `mypy`.
10+
- Replaced poetry with setuptools.
1011

1112
## v0.1.4 (12/09/2022)
1213

LICENSE

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,13 @@
1-
MIT License
1+
Copyright 2022 Maciej Nasinski
22

3-
Copyright (c) 2022, Maciej Nasinski
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
46

5-
Permission is hereby granted, free of charge, to any person obtaining a copy
6-
of this software and associated documentation files (the "Software"), to deal
7-
in the Software without restriction, including without limitation the rights
8-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9-
copies of the Software, and to permit persons to whom the Software is
10-
furnished to do so, subject to the following conditions:
11-
12-
The above copyright notice and this permission notice shall be included in all
13-
copies or substantial portions of the Software.
14-
15-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21-
SOFTWARE.
7+
http://www.apache.org/licenses/LICENSE-2.0
228

9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.

poetry.lock

Lines changed: 0 additions & 1351 deletions
This file was deleted.

pyproject.toml

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1-
[tool.poetry]
1+
[build-system]
2+
requires = ["setuptools"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
26
name = "cat2cat"
3-
version = "0.1.4.9006"
7+
authors = [
8+
{name = "Maciej Nasinski", email = "[email protected]"},
9+
]
410
description = "Unifying an inconsistently coded categorical variable in a panel/longtitudal dataset."
5-
authors = ["Maciej Nasinski"]
6-
license = "MIT"
711
readme = "README.md"
8-
homepage = "https://github.com/polkas/py-cat2cat"
9-
repository = "https://github.com/polkas/py-cat2cat"
10-
documentation = "https://py-cat2cat.readthedocs.io/en/latest/"
12+
version = "0.1.4.9007"
13+
requires-python = ">=3.8"
1114
keywords = ["panel", "categorical", "longtitudal", "inconsistent", "cat2cat"]
12-
include = ["CHANGELOG.md"]
15+
license = {text = "Apache License 2.0 | file LICENSE"}
1316
classifiers = [
1417
"Development Status :: 3 - Alpha",
1518
"Programming Language :: Python",
@@ -20,29 +23,37 @@ classifiers = [
2023
"Programming Language :: Python :: 3.11",
2124
"Programming Language :: Python :: Implementation :: PyPy",
2225
]
26+
dependencies = [
27+
"numpy",
28+
"pandas",
29+
"scikit-learn",
30+
"importlib-resources"
31+
]
2332

24-
[build-system]
25-
requires = ["poetry_core>=1.0.0"]
26-
build-backend = "poetry.core.masonry.api"
33+
[project.optional-dependencies]
34+
test = ["pytest", "pytest-cov", "mypy"]
35+
docs = [
36+
"Sphinx",
37+
"myst-parser",
38+
"sphinx-autoapi",
39+
"sphinx-rtd-theme"
40+
]
41+
build = ["build"]
42+
benchmark = ["snakeviz"]
43+
styler = ["flake8", "black"]
44+
all = ["cat2cat[test,docs,build,benchmark,styler]"]
45+
46+
[project.urls]
47+
homepage = "https://github.com/Polkas/multidim"
48+
documentation = "https://multidim.readthedocs.io/en/latest/"
49+
repository = "https://github.com/Polkas/multidim"
50+
changelog = "https://raw.githubusercontent.com/Polkas/multidim/main/CHANGELOG.md"
2751

28-
[tool.poetry.dependencies]
29-
python = ">=3.8,<3.12"
30-
numpy = "^1.23.1"
31-
pandas = "^1.4.3"
32-
scikit-learn = "^1.1.2"
33-
importlib-resources = "^5.9.0"
52+
[tool.setuptools.packages.find]
53+
where = ["src"]
3454

35-
[tool.poetry.group.dev.dependencies]
36-
pytest = "^7.1.2"
37-
flake8 = "^5.0.4"
38-
black = "^22.6.0"
39-
pytest-cov = "^3.0.0"
40-
Sphinx = "^6.2.1"
41-
sphinx-autoapi = "^3.0.0"
42-
sphinx-rtd-theme = "^1.3.0"
43-
myst-parser = "^2.0.0"
44-
snakeviz = "^2.1.1"
45-
mypy = "^0.982"
55+
[tool.setuptools.package-data]
56+
"cat2cat.data" = ["*"]
4657

4758
[tool.mypy]
4859
python_version = "3.8"

requirements.txt

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
alabaster==0.7.13
2+
anyascii==0.3.2
3+
astroid==3.0.2
4+
Babel==2.14.0
5+
black==23.12.1
6+
build==1.0.3
7+
cdcertifi==2023.11.17
8+
charset-normalizer==3.3.2
9+
click==8.1.7
10+
coverage==7.3.4
11+
docutils==0.20.1
12+
exceptiongroup==1.2.0
13+
flake8==6.1.0
14+
idna==3.6
15+
imagesize==1.4.1
16+
importlib-resources==6.1.1
17+
iniconfig==2.0.0
18+
Jinja2==3.1.2
19+
joblib==1.3.2
20+
markdown-it-py==3.0.0
21+
MarkupSafe==2.1.3
22+
mccabe==0.7.0
23+
mdit-py-plugins==0.4.0
24+
mdurl==0.1.2
25+
mypy==1.8.0
26+
mypy-extensions==1.0.0
27+
myst-parser==2.0.0
28+
numpy==1.26.2
29+
packaging==23.2
30+
pandas==2.1.4
31+
pathspec==0.12.1
32+
platformdirs==4.1.0
33+
pluggy==1.3.0
34+
pycodestyle==2.11.1
35+
pyflakes==3.1.0
36+
Pygments==2.17.2
37+
pyproject_hooks==1.0.0
38+
pytest==7.4.3
39+
pytest-cov==4.1.0
40+
python-dateutil==2.8.2
41+
pytz==2023.3.post1
42+
PyYAML==6.0.1
43+
requests==2.31.0
44+
scikit-learn==1.3.2
45+
scipy==1.11.4
46+
six==1.16.0
47+
snakeviz==2.2.0
48+
snowballstemmer==2.2.0
49+
Sphinx==7.2.6
50+
sphinx-autoapi==3.0.0
51+
sphinx-rtd-theme==2.0.0
52+
sphinxcontrib-applehelp==1.0.7
53+
sphinxcontrib-devhelp==1.0.5
54+
sphinxcontrib-htmlhelp==2.0.4
55+
sphinxcontrib-jquery==4.1
56+
sphinxcontrib-jsmath==1.0.1
57+
sphinxcontrib-qthelp==1.0.6
58+
sphinxcontrib-serializinghtml==1.1.9
59+
threadpoolctl==3.2.0
60+
tomli==2.0.1
61+
tornado==6.4
62+
typing_extensions==4.9.0
63+
tzdata==2023.3
64+
urllib3==2.1.0

src/cat2cat/mappings.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33

44
from collections.abc import Iterable
55
from collections import OrderedDict
6-
from typing import Union, Optional, Any, List, Dict, Sequence
6+
from typing import Union, Optional, Any, List, Dict, Sequence, TypeVar
77

88
__all__ = ["get_mappings", "cat_apply_freq", "get_freqs"]
99

10+
Table = TypeVar("Table", DataFrame, ndarray)
1011

11-
def get_mappings(x: Union[DataFrame, ndarray]) -> Dict[str, Dict[Any, List[Any]]]:
12+
def get_mappings(x: Table) -> Dict[str, Dict[Any, List[Any]]]:
1213
"""Transforming a mapping table with mappings to two associative lists
1314
1415
Transforming a transition table with mappings to two associative lists
@@ -37,23 +38,56 @@ def get_mappings(x: Union[DataFrame, ndarray]) -> Dict[str, Dict[Any, List[Any]]
3738
>>> mappings["to_new"]
3839
{1111.0: [111101.0, 111102.0], 1123.0: [111405.0], 1212.0: [112006.0, 112008.0, 112090.0, nan], nan: [111405.0]}
3940
"""
40-
4141
assert (len(x.shape) == 2) and (
4242
x.shape[1] == 2
4343
), "x should have 2 dimensions and the second one is equal to 2 (columns)"
4444

4545
if isinstance(x, DataFrame):
46-
ff = x.iloc[:, 0].copy()
47-
which_ff_null = ff.isnull()
48-
ff = ff.values
49-
ss = x.iloc[:, 1].copy()
50-
which_ss_null = ss.isnull()
51-
ss = ss.values
46+
return get_mappings_df(x)
5247
elif isinstance(x, ndarray):
53-
ff = x[:, 0].copy()
54-
ss = x[:, 1].copy()
48+
return get_mappings_array(x)
5549
else:
56-
raise (TypeError)
50+
raise TypeError("get_mappings input has to be ndarray or DataFrame")
51+
52+
def get_mappings_array(x: ndarray) -> Dict[str, Dict[Any, List[Any]]]:
53+
ff = x[:, 0].copy()
54+
ss = x[:, 1].copy()
55+
56+
assert ff.dtype == ss.dtype
57+
col_type = ff.dtype
58+
59+
from_old = list(OrderedDict.fromkeys(ff))
60+
from_new = list(OrderedDict.fromkeys(ss))
61+
62+
to_old = dict()
63+
for e in from_new:
64+
if (col_type in [float, int]) and isnan(e):
65+
idx = isnan(ss)
66+
else:
67+
idx = ss == e
68+
69+
# sorted so results are stable
70+
to_old[e] = sorted(unique(ff[idx]))
71+
72+
to_new = dict()
73+
for e in from_old:
74+
if (col_type in [float, int]) and isnan(e):
75+
idx = isnan(ff)
76+
else:
77+
idx = ff == e
78+
79+
# sorted so results are stable
80+
to_new[e] = sorted(unique(ss[idx]))
81+
82+
return dict(to_old=to_old, to_new=to_new)
83+
84+
def get_mappings_df(x: DataFrame) -> Dict[str, Dict[Any, List[Any]]]:
85+
ff = x.iloc[:, 0].copy()
86+
which_ff_null = ff.isnull()
87+
ff = ff.values
88+
ss = x.iloc[:, 1].copy()
89+
which_ss_null = ss.isnull()
90+
ss = ss.values
5791

5892
assert ff.dtype == ss.dtype
5993
col_type = ff.dtype
@@ -87,7 +121,6 @@ def get_mappings(x: Union[DataFrame, ndarray]) -> Dict[str, Dict[Any, List[Any]]
87121

88122
return dict(to_old=to_old, to_new=to_new)
89123

90-
91124
def get_freqs(
92125
x: Sequence[Any], multiplier: Optional[Sequence[int]] = None
93126
) -> Dict[Any, int]:

tests/test_cat2cat.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def int_round(x: float) -> int:
5959
},
6060
}
6161
which_target_origin = {"backward": ("old", "new"), "forward": ("new", "old")}
62+
code_var_name = {"backward": "code", "forward": "code4"}
6263

6364

6465
@pytest.mark.parametrize("direction", ["backward", "forward"])
@@ -146,9 +147,8 @@ def test_cat2cat_custom_freqs(direction, cat_type):
146147
assert data_dict[cat_type]["old"].equals(o)
147148
assert data_dict[cat_type]["new"].equals(n)
148149

149-
150150
@pytest.mark.parametrize("cat_type", ["str", "int"])
151-
@pytest.mark.parametrize("direction", ["backward", "forward"])
151+
@pytest.mark.parametrize("direction", ["backward"])
152152
def test_cat2cat_ml(direction, cat_type):
153153
o = data_dict[cat_type]["old"].copy()
154154
n = data_dict[cat_type]["new"].copy()
@@ -160,7 +160,7 @@ def test_cat2cat_ml(direction, cat_type):
160160
)
161161
ml = cat2cat_ml(
162162
occup.loc[occup.year >= 2010, :].copy(),
163-
"code",
163+
code_var_name[direction],
164164
["salary", "age", "edu", "sex"],
165165
[DecisionTreeClassifier(), LinearDiscriminantAnalysis()],
166166
)

0 commit comments

Comments
 (0)