Skip to content

Commit e8243f7

Browse files
tgbugsnicholascaredmondchucpre-commit-ci[bot]
authored
notation3.py: don't normalize float representation (#3020)
* added n3 test to check for internal float normalization made as a separate commit to illustrate the old broken behavior priro to the fix in the next commit * notation3.py: don't normalize float representation fix behavior of the n3 parser family to avoid normalizing raw float string representation which makes it impossible to roundtrip the exact original string representation of e.g. 1e10 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * style: add noqa to sfloat class * chore: remove unused mypy type ignore * test: refactor test_float_no_norm to use pytest parametrization --------- Co-authored-by: Nicholas Car <[email protected]> Co-authored-by: Edmond Chuc <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Edmond Chuc <[email protected]>
1 parent 8da4e29 commit e8243f7

File tree

2 files changed

+26
-5
lines changed

2 files changed

+26
-5
lines changed

rdflib/plugins/parsers/notation3.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ def unicodeExpand(m: Match) -> str:
384384
langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*")
385385

386386

387+
class sfloat(str): # noqa: N801
388+
"""don't normalize raw XSD.double string representation"""
389+
390+
387391
class SinkParser:
388392
def __init__(
389393
self,
@@ -1530,7 +1534,7 @@ def nodeOrLiteral(self, argstr: str, i: int, res: MutableSequence[Any]) -> int:
15301534
m = exponent_syntax.match(argstr, i)
15311535
if m:
15321536
j = m.end()
1533-
res.append(float(argstr[i:j]))
1537+
res.append(sfloat(argstr[i:j]))
15341538
return j
15351539

15361540
m = decimal_syntax.match(argstr, i)
@@ -1921,15 +1925,15 @@ def normalise(self, f: Formula | Graph | None, n: int) -> Literal: ...
19211925
def normalise(self, f: Formula | Graph | None, n: Decimal) -> Literal: ...
19221926

19231927
@overload
1924-
def normalise(self, f: Formula | Graph | None, n: float) -> Literal: ...
1928+
def normalise(self, f: Formula | Graph | None, n: sfloat) -> Literal: ...
19251929

19261930
@overload
19271931
def normalise(self, f: Formula | Graph | None, n: Node) -> Node: ...
19281932

19291933
def normalise(
19301934
self,
19311935
f: Formula | Graph | None,
1932-
n: Union[tuple[int, str], bool, int, Decimal, float, Node, _AnyT],
1936+
n: Union[tuple[int, str], bool, int, Decimal, sfloat, Node, _AnyT],
19331937
) -> Union[URIRef, Literal, BNode, Node, _AnyT]:
19341938
if isinstance(n, tuple):
19351939
return URIRef(str(n[1]))
@@ -1949,7 +1953,7 @@ def normalise(
19491953
s = Literal(value, datatype=DECIMAL_DATATYPE)
19501954
return s
19511955

1952-
if isinstance(n, float):
1956+
if isinstance(n, sfloat):
19531957
s = Literal(str(n), datatype=DOUBLE_DATATYPE)
19541958
return s
19551959

@@ -1965,7 +1969,7 @@ def normalise(
19651969
# f.universals[n] = f.newBlankNode()
19661970
# return f.universals[n]
19671971
# type error: Incompatible return value type (got "Union[int, _AnyT]", expected "Union[URIRef, Literal, BNode, _AnyT]") [return-value]
1968-
return n # type: ignore[return-value]
1972+
return n
19691973

19701974
def intern(self, something: _AnyT) -> _AnyT:
19711975
return something

test/test_n3.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,23 @@ def test_empty_prefix(self):
251251
g2
252252
), "Document with declared empty prefix must match default #"
253253

254+
@pytest.mark.parametrize(
255+
"do_normalize_literal, expected_result",
256+
[(True, {"1.0", "10000000000.0"}), (False, {"1e10", "1e0"})],
257+
)
258+
def test_float_no_norm(self, do_normalize_literal, expected_result):
259+
import rdflib
260+
261+
original_normalize_literal = rdflib.NORMALIZE_LITERALS
262+
try:
263+
rdflib.NORMALIZE_LITERALS = do_normalize_literal
264+
g1 = Graph()
265+
g1.parse(data=":a :b 1e10, 1e0 .", format="n3")
266+
values = set(str(o) for o in g1.objects())
267+
assert values == expected_result
268+
finally:
269+
rdflib.NORMALIZE_LITERALS = original_normalize_literal
270+
254271

255272
class TestRegularExpressions:
256273
def test_exponents(self):

0 commit comments

Comments
 (0)