From ed1b01baeef13f5298dad94f471923b596bc0f72 Mon Sep 17 00:00:00 2001 From: rileyh Date: Mon, 7 Oct 2024 18:03:04 +0000 Subject: [PATCH 1/2] [#150] Install setuptools as a dev dependency This should hopefully fix the distutils import issue which causes the tests to fail on Python 3.12. --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2f024dd..e43cab0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,10 @@ dev = [ "pre-commit>=2.0", "twine>=4.0", "build>=0.6", + # setuptools is only necessary because Python 3.12 removed the distutils + # package. pyspark 3.5.X still imports this package, so we need it on + # Python 3.12 to run the tests and have hlink work at all. + "setuptools", # These are pinned so tightly because their version numbers appear in the docs. # So if you use a different version, it creates a huge diff in the docs. # TODO: auto-generate docs on push to GitHub instead of committing them to the From 42b4f45e440fa7a9b64afe754ff2f6a81a13e44f Mon Sep 17 00:00:00 2001 From: rileyh Date: Mon, 7 Oct 2024 18:25:42 +0000 Subject: [PATCH 2/2] [#150] Update the README with info about Python 3.12 and pyspark We can do our best to support Python 3.12, but it's a bit janky because pyspark doesn't officially support it. Python 3.10 and 3.11 should be pretty solid. --- README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f285a9..c020e7a 100755 --- a/README.md +++ b/README.md @@ -17,11 +17,28 @@ hlink requires - Python 3.10, 3.11, or 3.12 - Java 8 or greater for integration with PySpark -You can install the newest version of the python package directly from PyPI with pip: +You can install the newest version of the Python package directly from PyPI with pip: ``` pip install hlink ``` +We do our best to make hlink compatible with Python 3.10-3.12. If you have a +problem using hlink on one of these versions of Python, please open an issue +through GitHub. Versions of Python older than 3.10 are not supported. + +Note that pyspark 3.5 does not yet officially support Python 3.12. If you +encounter pyspark-related import errors while running hlink on Python 3.12, try + +- Installing the setuptools package. The distutils package was deleted from the + standard library in Python 3.12, but some versions of pyspark still import + it. The setuptools package provides a hacky stand-in distutils library which + should fix some import errors in pyspark. We install setuptools in our + development and test dependencies so that our tests work on Python 3.12. + +- Downgrading Python to 3.10 or 3.11. Pyspark officially supports these + versions of Python. So you should have better chances getting pyspark to work + well on Python 3.10 or 3.11. + ## Docs The documentation site can be found at [hlink.docs.ipums.org](https://hlink.docs.ipums.org).