notation3.py: don't normalize float representation (#3020)

tgbugs · nicholascar · edmondchuc · web-flow · commit e8243f7dc598 · 2025-09-18T14:19:42.000+10:00
* added n3 test to check for internal float normalization made as a separate commit to illustrate the old broken behavior priro to the fix in the next commit * notation3.py: don't normalize float representation fix behavior of the n3 parser family to avoid normalizing raw float string representation which makes it impossible to roundtrip the exact original string representation of e.g. 1e10 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * style: add noqa to sfloat class * chore: remove unused mypy type ignore * test: refactor test_float_no_norm to use pytest parametrization --------- Co-authored-by: Nicholas Car <nick@kurrawong.net> Co-authored-by: Edmond Chuc <37032744+edmondchuc@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Edmond Chuc <edmond@kurrawong.ai>
diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py
@@ -384,6 +384,10 @@ def unicodeExpand(m: Match) -> str:
 langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*")
 
 
+class sfloat(str):  # noqa: N801
+    """don't normalize raw XSD.double string representation"""
+
+
 class SinkParser:
     def __init__(
         self,
@@ -1530,7 +1534,7 @@ def nodeOrLiteral(self, argstr: str, i: int, res: MutableSequence[Any]) -> int:
                 m = exponent_syntax.match(argstr, i)
                 if m:
                     j = m.end()
-                    res.append(float(argstr[i:j]))
+                    res.append(sfloat(argstr[i:j]))
                     return j
 
                 m = decimal_syntax.match(argstr, i)
@@ -1921,15 +1925,15 @@ def normalise(self, f: Formula | Graph | None, n: int) -> Literal: ...
     def normalise(self, f: Formula | Graph | None, n: Decimal) -> Literal: ...
 
     @overload
-    def normalise(self, f: Formula | Graph | None, n: float) -> Literal: ...
+    def normalise(self, f: Formula | Graph | None, n: sfloat) -> Literal: ...
 
     @overload
     def normalise(self, f: Formula | Graph | None, n: Node) -> Node: ...
 
     def normalise(
         self,
         f: Formula | Graph | None,
-        n: Union[tuple[int, str], bool, int, Decimal, float, Node, _AnyT],
+        n: Union[tuple[int, str], bool, int, Decimal, sfloat, Node, _AnyT],
     ) -> Union[URIRef, Literal, BNode, Node, _AnyT]:
         if isinstance(n, tuple):
             return URIRef(str(n[1]))
@@ -1949,7 +1953,7 @@ def normalise(
             s = Literal(value, datatype=DECIMAL_DATATYPE)
             return s
 
-        if isinstance(n, float):
+        if isinstance(n, sfloat):
             s = Literal(str(n), datatype=DOUBLE_DATATYPE)
             return s
 
@@ -1965,7 +1969,7 @@ def normalise(
         #    f.universals[n] = f.newBlankNode()
         #    return f.universals[n]
         # type error: Incompatible return value type (got "Union[int, _AnyT]", expected "Union[URIRef, Literal, BNode, _AnyT]")  [return-value]
-        return n  # type: ignore[return-value]
+        return n
 
     def intern(self, something: _AnyT) -> _AnyT:
         return something
diff --git a/test/test_n3.py b/test/test_n3.py
@@ -251,6 +251,23 @@ def test_empty_prefix(self):
             g2
         ), "Document with declared empty prefix must match default #"
 
+    @pytest.mark.parametrize(
+        "do_normalize_literal, expected_result",
+        [(True, {"1.0", "10000000000.0"}), (False, {"1e10", "1e0"})],
+    )
+    def test_float_no_norm(self, do_normalize_literal, expected_result):
+        import rdflib
+
+        original_normalize_literal = rdflib.NORMALIZE_LITERALS
+        try:
+            rdflib.NORMALIZE_LITERALS = do_normalize_literal
+            g1 = Graph()
+            g1.parse(data=":a :b 1e10, 1e0 .", format="n3")
+            values = set(str(o) for o in g1.objects())
+            assert values == expected_result
+        finally:
+            rdflib.NORMALIZE_LITERALS = original_normalize_literal
+
 
 class TestRegularExpressions:
     def test_exponents(self):