Skip to content

Commit

Permalink
Improve errors for invalid distribution metadata. (#2443)
Browse files Browse the repository at this point in the history
Fixes #2441
  • Loading branch information
jsirois authored Jun 26, 2024
1 parent 0b15791 commit cd46690
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 65 deletions.
10 changes: 9 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Release Notes

## 2.6.1

This release improves error messages when attempting to read invalid
metadata from distributions such that the problematic distribution is
always identified.

* Improve errors for invalid distribution metadata. (#2443)

## 2.6.0

This release adds support for [PEP-723](
Expand All @@ -9,7 +17,7 @@ requirements, running the script is as simple as
`pex --exe <script> -- <script args>` and building a PEX encapsulating
it as simple as `pex --exe <script> --output <PEX file>`.

* Add support for PEP-723 script metadata to `--exe`. (#2436)
* Add support for PEP-723 script metadata to `--exe`. (#2436)

## 2.5.0

Expand Down
137 changes: 85 additions & 52 deletions pex/dist_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from pex.third_party.packaging.markers import Marker
from pex.third_party.packaging.requirements import InvalidRequirement
from pex.third_party.packaging.requirements import Requirement as PackagingRequirement
from pex.third_party.packaging.specifiers import SpecifierSet
from pex.third_party.packaging.specifiers import InvalidSpecifier, SpecifierSet
from pex.typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
Expand Down Expand Up @@ -71,6 +71,10 @@ class MetadataNotFoundError(MetadataError):
"""Indicates an expected metadata file could not be found for a given distribution."""


class InvalidMetadataError(MetadataError):
"""Indicates a metadata value that is invalid."""


def _strip_sdist_path(sdist_path):
# type: (Text) -> Optional[Text]
if not sdist_path.endswith((".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz", ".txz", ".zip")):
Expand All @@ -97,6 +101,15 @@ class DistMetadataFile(object):
version = attr.ib() # type: Version
pkg_info = attr.ib(eq=False) # type: Message

def render_description(self):
# type: () -> str
return "{project_name} {version} metadata from {rel_path} at {location}".format(
project_name=self.project_name,
version=self.version,
rel_path=self.rel_path,
location=self.location,
)


@attr.s(frozen=True)
class MetadataFiles(object):
Expand Down Expand Up @@ -371,20 +384,6 @@ def load_metadata(
return None


_PKG_INFO_BY_DIST_LOCATION = {} # type: Dict[Text, Optional[Message]]


def _parse_pkg_info(location):
# type: (Text) -> Optional[Message]
if location not in _PKG_INFO_BY_DIST_LOCATION:
pkg_info = None # type: Optional[Message]
metadata_files = load_metadata(location)
if metadata_files:
pkg_info = metadata_files.metadata.pkg_info
_PKG_INFO_BY_DIST_LOCATION[location] = pkg_info
return _PKG_INFO_BY_DIST_LOCATION[location]


@attr.s(frozen=True)
class ProjectNameAndVersion(object):
@classmethod
Expand Down Expand Up @@ -487,7 +486,7 @@ def project_name_and_version(


def requires_python(location):
# type: (Union[Text, Distribution, Message, MetadataFiles]) -> Optional[SpecifierSet]
# type: (Union[Distribution, MetadataFiles, Text]) -> Optional[SpecifierSet]
"""Examines dist for `Python-Requires` metadata and returns version constraints if any.
See: https://www.python.org/dev/peps/pep-0345/#requires-python
Expand All @@ -498,33 +497,39 @@ def requires_python(location):
if isinstance(location, Distribution):
return location.metadata.requires_python

pkg_info = None # type: Optional[Message]
if isinstance(location, Message):
pkg_info = location
elif isinstance(location, MetadataFiles):
pkg_info = location.metadata.pkg_info
metadata_files = None # type: Optional[MetadataFiles]
if isinstance(location, MetadataFiles):
metadata_files = location
else:
# N.B.: This load can fail, but the source is the location of the metadata files, which
# contains useful information in the path name for identifying the problem project and
# version.
metadata_files = load_metadata(location)
if metadata_files:
pkg_info = metadata_files.metadata.pkg_info
if pkg_info is None:
if metadata_files is None:
return None

python_requirement = pkg_info.get("Requires-Python", None)
python_requirement = metadata_files.metadata.pkg_info.get("Requires-Python", None)
if python_requirement is None:
return None
return SpecifierSet(python_requirement)
try:
return SpecifierSet(python_requirement)
except InvalidSpecifier as e:
raise InvalidMetadataError(
"Invalid Requires-Python metadata found in {source} {value!r}: {err}".format(
source=metadata_files.metadata.render_description(), value=python_requirement, err=e
)
)


def _parse_requires_txt(content):
# type: (bytes) -> Iterator[Requirement]
# type: (bytes) -> Iterator[Union[Requirement, Tuple[int, Text, RequirementParseError]]]
# See:
# + High level: https://setuptools.pypa.io/en/latest/deprecated/python_eggs.html#requires-txt
# + Low level:
# + https://github.com/pypa/setuptools/blob/fbe0d7962822c2a1fdde8dd179f2f8b8c8bf8892/pkg_resources/__init__.py#L3256-L3279
# + https://github.com/pypa/setuptools/blob/fbe0d7962822c2a1fdde8dd179f2f8b8c8bf8892/pkg_resources/__init__.py#L2792-L2818
marker = ""
for line in content.decode("utf-8").splitlines():
for line_no, line in enumerate(content.decode("utf-8").splitlines(), start=1):
line = line.strip()
if not line:
continue
Expand All @@ -539,11 +544,15 @@ def _parse_requires_txt(content):
if markers:
marker = "; {markers}".format(markers=" and ".join(markers))
else:
yield Requirement.parse(line + marker)
req = line + marker
try:
yield Requirement.parse(req)
except RequirementParseError as e:
yield line_no, req, e


def requires_dists(location):
# type: (Union[Text, Distribution, Message, MetadataFiles]) -> Iterator[Requirement]
# type: (Union[Distribution, MetadataFiles, Text]) -> Iterator[Requirement]
"""Examines dist for and returns any declared requirements.
Looks for `Requires-Dist` metadata.
Expand All @@ -564,37 +573,61 @@ def requires_dists(location):
yield requirement
return

pkg_info = None # type: Optional[Message]
if isinstance(location, Message):
pkg_info = location
elif isinstance(location, MetadataFiles):
pkg_info = location.metadata.pkg_info
metadata_files = None # type: Optional[MetadataFiles]
if isinstance(location, MetadataFiles):
metadata_files = location
else:
# N.B.: This load can fail, but the source is the location of the metadata files, which
# contains useful information in the path name for identifying the problem project and
# version.
metadata_files = load_metadata(location)
if metadata_files:
pkg_info = metadata_files.metadata.pkg_info
if pkg_info is None:
if metadata_files is None:
return

requires_dists = pkg_info.get_all("Requires-Dist", ())
if (
not requires_dists
and isinstance(location, MetadataFiles)
and MetadataType.EGG_INFO is location.metadata.type
):
invalid_values = [] # type: List[str]
requires_dists = metadata_files.metadata.pkg_info.get_all("Requires-Dist", ())
if not requires_dists and MetadataType.EGG_INFO is metadata_files.metadata.type:
for metadata_file in "requires.txt", "depends.txt":
content = location.read(metadata_file)
content = metadata_files.read(metadata_file)
if content:
for requirement in _parse_requires_txt(content):
yield requirement
for requirement_or_error in _parse_requires_txt(content):
if isinstance(requirement_or_error, Requirement):
yield requirement_or_error
else:
line_no, req, err = requirement_or_error
invalid_values.append(
"{file}:{line_no} {req!r}: {err}".format(
file=(
metadata_files.metadata_file_rel_path(metadata_file)
or metadata_file
),
line_no=line_no,
req=req,
err=err,
)
)
else:
for requires_dist in requires_dists:
yield Requirement.parse(requires_dist)
try:
yield Requirement.parse(requires_dist)
except RequirementParseError as e:
invalid_values.append("{req!r}: {err}".format(req=requires_dist, err=e))
if invalid_values:
raise InvalidMetadataError(
"Found {count} invalid Requires-Dist metadata {values} in {source}:\n"
"{invalid_values}".format(
count=len(invalid_values),
values=pluralize(invalid_values, "value"),
source=metadata_files.metadata.render_description(),
invalid_values="\n".join(
"{index}. {invalid_value}".format(index=index, invalid_value=invalid_value)
for index, invalid_value in enumerate(invalid_values, start=1)
),
)
)

legacy_requires = pkg_info.get_all("Requires", []) # type: List[str]
legacy_requires = metadata_files.metadata.pkg_info.get_all("Requires", []) # type: List[str]
if legacy_requires:
name_and_version = project_name_and_version(location)
project_name = name_and_version.project_name if name_and_version else location
pex_warnings.warn(
dedent(
"""\
Expand All @@ -607,7 +640,7 @@ def requires_dists(location):
"""
).format(
dist=location,
project_name=project_name,
project_name=metadata_files.metadata.project_name,
count=len(legacy_requires),
field=pluralize(legacy_requires, "field"),
requires=os.linesep.join(
Expand Down
2 changes: 1 addition & 1 deletion pex/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2015 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

__version__ = "2.6.0"
__version__ = "2.6.1"
44 changes: 34 additions & 10 deletions testing/dist_metadata.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import glob
import os.path
from email.message import Message
from typing import Callable, Text, Tuple

from pex.common import safe_mkdtemp
from pex.dist_metadata import DistMetadata, DistMetadataFile, MetadataFiles, MetadataType
Expand All @@ -20,6 +22,7 @@ def create_dist_metadata(
requires_python=None, # type: Optional[str]
requires_dists=(), # type: Iterable[str]
location=None, # type: Optional[str]
metadata_type=MetadataType.DIST_INFO, # type: MetadataType.Value
):
# type: (...) -> DistMetadata

Expand All @@ -30,20 +33,41 @@ def create_dist_metadata(
pkg_info.add_header("Requires-Python", requires_python)
for requirement in requires_dists:
pkg_info.add_header("Requires-Dist", requirement)

resolved_location = location or safe_mkdtemp()
metadata_dir = "{project_name}-{version}.{suffix}".format(
project_name=project_name,
version=version,
suffix="dist-info" if metadata_type is MetadataType.DIST_INFO else "egg-info",
)
metadata_file_name = "METADATA" if metadata_type is MetadataType.DIST_INFO else "PKG-INFO"

additional_metadata_files = () # type: Tuple[Text, ...]
read_function = None # type: Optional[Callable[[Text], bytes]]
if os.path.isdir(resolved_location):
additional_metadata_files = tuple(
os.path.relpath(metadata_path, resolved_location)
for metadata_path in glob.glob(os.path.join(resolved_location, metadata_dir, "*"))
if os.path.basename(metadata_path) != metadata_file_name
)
if additional_metadata_files:

def read_function(rel_path):
# type: (Text) -> bytes
with open(os.path.join(resolved_location, rel_path), "rb") as fp:
return fp.read()

return DistMetadata.from_metadata_files(
MetadataFiles(
DistMetadataFile(
type=MetadataType.DIST_INFO,
location=location or safe_mkdtemp(),
rel_path=os.path.join(
"{project_name}-{version}.dist-info".format(
project_name=project_name, version=version
),
"METADATA",
),
metadata=DistMetadataFile(
type=metadata_type,
location=resolved_location,
rel_path=os.path.join(metadata_dir, metadata_file_name),
project_name=ProjectName(project_name),
version=Version(version),
pkg_info=pkg_info,
)
),
additional_metadata_files=additional_metadata_files,
read_function=read_function,
)
)
64 changes: 64 additions & 0 deletions tests/integration/test_issue_2441.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright 2024 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import re

import pytest

from pex.pip.version import PipVersion
from pex.typing import TYPE_CHECKING
from testing import PY_VER
from testing.cli import run_pex3

if TYPE_CHECKING:
from typing import Any


EXPECTED_ERROR_HEADER_LEAD_IN = (
"Found 7 invalid Requires-Dist metadata values in mlflow 1.27 metadata from "
"mlflow-1.27.0.dist-info/METADATA at"
)

EXPECTED_WHEEL = "mlflow-1.27.0-py3-none-any.whl"

EXPECTED_ERROR_FOOTER = """\
1. "scikit-learn (>=1.0.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
scikit-learn (>=1.0.*) ; extra == 'pipelines'
~~~~~~^
2. "pyarrow (>=7.0.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
pyarrow (>=7.0.*) ; extra == 'pipelines'
~~~~~~^
3. "shap (>=0.40.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
shap (>=0.40.*) ; extra == 'pipelines'
~~~~~~~^
4. "pandas-profiling (>=3.1.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
pandas-profiling (>=3.1.*) ; extra == 'pipelines'
~~~~~~^
5. "ipython (>=7.0.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
ipython (>=7.0.*) ; extra == 'pipelines'
~~~~~~^
6. "markdown (>=3.3.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
markdown (>=3.3.*) ; extra == 'pipelines'
~~~~~~^
7. "Jinja2 (>=3.0.*) ; extra == 'pipelines'": .* suffix can only be used with `==` or `!=` operators
Jinja2 (>=3.0.*) ; extra == 'pipelines'
~~~~~~^
"""


@pytest.mark.skipif(
PY_VER < (3, 7) or PipVersion.DEFAULT.version >= PipVersion.v24_1.version,
reason="The mlflow 1.27.0 distribution requires Python>=3.7 and the test requires Pip<24.1.",
)
def test_invalid_metadata_error_messages_under_old_pip(tmpdir):
# type: (Any) -> None

run_pex3("lock", "create", "mlflow==1.27.0", "--intransitive").assert_failure(
expected_error_re=r"^{header_lead_in} .*/{expected_wheel}:\n{footer}$".format(
header_lead_in=re.escape(EXPECTED_ERROR_HEADER_LEAD_IN),
expected_wheel=re.escape(EXPECTED_WHEEL),
footer=re.escape(EXPECTED_ERROR_FOOTER),
)
)
Loading

0 comments on commit cd46690

Please sign in to comment.