fix: raise an exception when serialization format does not support quads

aucampia · aucampia · commit d280f6ed017f · 2023-09-24T19:14:00.000Z
- fixes <#2393> checkpoint checkpoint checkpoint
diff --git a/rdflib/util.py b/rdflib/util.py
@@ -53,7 +53,6 @@
 if TYPE_CHECKING:
     from rdflib.graph import Graph
 
-
 __all__ = [
     "list2set",
     "first",
@@ -615,3 +614,31 @@ def _iri2uri(iri: str) -> str:
         uri += "#"
 
     return uri
+
+
+def _has_non_default_graphs(graph: rdflib.graph.ConjunctiveGraph) -> bool:
+    """
+    Check if the container passed as `graph` contains graphs other than the
+    default graph.
+
+    The intent of this is to detect if the value passed can be serialized using
+    formats which do not support named graphs like N-Triples and Turtle.
+
+    Ideally this function would check if the supplied value contains any named
+    graphs, but RDFLib assigns a name to the default graph, so the best that can
+    be done is to check if the supplied graph contains any graphs other than the
+    default graph.
+
+    If the supplied value contains only the default graph and other graphs, this
+    function will return `False`, otherwise if the value passed contains at
+    least one graph other than the default graph it will return `True`.
+    """
+    default_context = graph.default_context
+    # logging.debug("default_context.identifier = %s", default_context.identifier)
+    for context_index, context in enumerate(graph.contexts()):
+        # logging.debug("contexts[%s].identifier = %s", context_index, context.identifier)
+        if context.identifier != default_context.identifier:
+            return True
+        if context_index > 0:
+            return True
+    return False
diff --git a/test/data/__init__.py b/test/data/__init__.py
diff --git a/test/data/variants/README.md b/test/data/variants/README.md
@@ -1,12 +1,12 @@
 # multi variant graphs
 
-This directory containts variants of the same graph encoded in different
+This directory contains variants of the same graph encoded in different
 formats, or differently in the same format.
 
-The graph that a specific file is a variant of is determined by it's filename.
-Files that differ only in file extention but have the same basename are
+The graph that a specific file is a variant of is determined by its filename.
+Files that differ only in file extensions but have the same basename are
 considered variants of the same graph. Additionally, any suffix that matches
-`-variant-[^/]*` is excluded when determening the graph key, so the following
+`-variant-[^/]*` is excluded when determining the graph key, so the following
 files are all considered variants of the same graph:
 
 ```
diff --git a/test/data/variants/diverse_triples.nq b/test/data/variants/diverse_triples.nq
@@ -0,0 +1,5 @@
+<http://example.com/subject> <http://example.com/predicate> "日本語の表記体系"@jpx .
+<urn:example:subject> <example:predicate> <example:subject> .
+<example:object> <http://example.com/predicate> "XSD string" .
+<example:subject> <example:predicate> <example:object> .
+<example:subject> <example:predicate> "12"^^<http://www.w3.org/2001/XMLSchema#integer> .
diff --git a/test/data/variants/diverse_triples.py b/test/data/variants/diverse_triples.py
@@ -14,5 +14,4 @@ def populate_graph(graph: Graph) -> None:
     graph.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object))
     graph.add((EGSCHEME.subject, EGSCHEME.predicate, Literal(12)))
 
-
 __all__ = ["populate_graph"]
diff --git a/test/data/variants/diverse_triples.trig b/test/data/variants/diverse_triples.trig
@@ -0,0 +1,15 @@
+@prefix eghttp: <http://example.com/> .
+@prefix egurn: <urn:example:> .
+@prefix egschema: <example:> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+{
+    eghttp:subject eghttp:predicate "日本語の表記体系"@jpx .
+
+    egurn:subject egschema:predicate egschema:subject .
+}
+
+egschema:object eghttp:predicate "XSD string"^^xsd:string .
+
+egschema:subject egschema:predicate egschema:object,
+        12 .
diff --git a/test/test_dataset/test_dataset.py b/test/test_dataset/test_dataset.py
@@ -3,12 +3,20 @@
 import tempfile
 from test.data import CONTEXT1, LIKES, PIZZA, TAREK
 from test.utils.namespace import EGSCHEME
+import logging
+import os
+import shutil
+import tempfile
+from typing import Optional
+from rdflib.term import Identifier, Literal
 
 import pytest
 
 from rdflib import URIRef, plugin
 from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
 from rdflib.store import Store
+from test.utils.namespace import EGDC, EGSCHEME, EGURN
+from rdflib.namespace import XSD
 
 # Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if
 # available. This assumes SPARQL1.1 query/update endpoints running locally at
@@ -261,3 +269,47 @@ def test_subgraph_without_identifier() -> None:
     ) == ("genid", genid_prefix)
 
     assert f"{subgraph.identifier}".startswith(genid_prefix)
+
+
+def test_updating_datatype() -> None:
+    dataset = Dataset()
+
+    dataset.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object))
+
+    egurn_graph = dataset.get_context(EGURN.graph)
+    egurn_graph.add(
+        (EGSCHEME.subject, EGDC.predicate, Literal("XSD string", datatype=XSD.string))
+    )
+
+    def find_literal_obj_ctx_id(dataset: Dataset, literal_value: str) -> Optional[Identifier]:
+        for quad in (quad for quad in dataset.quads((None, None, None, None))):
+            if isinstance(quad[2], Literal) and quad[2].value == literal_value:
+                logging.debug("quad = %s", quad)
+                return quad[3]
+        return None
+
+    assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph
+
+    for context in dataset.contexts():
+        logging.debug("context.identifier = %s", context.identifier)
+        for triple in context:
+            logging.debug("triple = %s", triple)
+            object = triple[2]
+            if not isinstance(object, Literal):
+                continue
+            if object.datatype is None:
+                continue
+            logging.debug("object.datatype = %s", object.datatype)
+            if object.datatype == XSD.string:
+                object._datatype = None
+
+    assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph
+
+
+    # found = False
+    # for quad in (quad for quad in dataset.quads((None, None, None, None))):
+    #     if isinstance(quad[2], Literal) and quad[2].value == "XSD string":
+    #         logging.debug("quad = %s", quad)
+    #         # found = True
+    #         # break
+    # # assert found is True
diff --git a/test/test_serializers/test_serialize_context_aware.py b/test/test_serializers/test_serialize_context_aware.py
@@ -0,0 +1,53 @@
+import itertools
+import logging
+from test.utils.graph import GRAPH_FORMATS, GraphType
+from test.utils.variants import load_pyvariant
+from typing import Dict, Iterable, Type
+
+import pytest
+from _pytest.mark.structures import ParameterSet
+from _pytest.outcomes import Failed
+
+from rdflib.graph import ConjunctiveGraph, Dataset, Graph
+
+
+def make_quads_in_triples_cases() -> Iterable[ParameterSet]:
+    """
+    Generate test cases for serializing named graphs (i.e. quads) into a format
+    that does not support named graphs.
+    """
+    triple_only_formats = [
+        graph_format
+        for graph_format in GRAPH_FORMATS
+        if graph_format.info.graph_types == {GraphType.TRIPLE}
+    ]
+    for graph_type, graph_format in itertools.product(
+        (ConjunctiveGraph, Dataset), triple_only_formats
+    ):
+        for serializer in graph_format.info.serializers:
+            yield pytest.param(
+                graph_type, serializer, marks=pytest.mark.xfail(raises=Failed)
+            )
+
+
+CONJUNCTIVE_GRAPH_WITH_QUADS = load_pyvariant("diverse_quads", ConjunctiveGraph)
+DATASET_WITH_QUADS = load_pyvariant("diverse_quads", Dataset)
+
+GRAPHS: Dict[Type[Graph], Graph] = {
+    ConjunctiveGraph: CONJUNCTIVE_GRAPH_WITH_QUADS,
+    Dataset: DATASET_WITH_QUADS,
+}
+
+
+@pytest.mark.parametrize(["graph_type", "serializer"], make_quads_in_triples_cases())
+def test_quads_in_triples(graph_type: Type[ConjunctiveGraph], serializer: str) -> None:
+    """
+    Serializing named graphs (i.e. quads) inside a `ConjunctiveGraph` into a
+    format that does not support named graphs should result in an exception.
+    """
+    graph = GRAPHS[graph_type]
+    assert type(graph) is graph_type
+    with pytest.raises(Exception) as caught:
+        graph.serialize(format=serializer)
+
+    logging.debug("caught.value = %r", caught.value, exc_info=caught.value)
diff --git a/test/test_util.py b/test/test_util.py
@@ -1,21 +1,37 @@
 from __future__ import annotations
 
+import itertools
+from json import load
 import logging
 import time
 from contextlib import ExitStack
 from pathlib import Path
 from test.data import TEST_DATA_DIR
-from test.utils.graph import cached_graph
+from test.utils.graph import cached_graph, load_sources
 from test.utils.namespace import RDFT
-from typing import Any, Collection, List, Optional, Set, Tuple, Type, Union
+from test.utils.outcome import ExpectedOutcome, ValueOutcome
+from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type, Union
 
 import pytest
+from _pytest.mark.structures import ParameterSet
 
 from rdflib import XSD, util
-from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph
+from rdflib.graph import (
+    ConjunctiveGraph,
+    Dataset,
+    Graph,
+    QuotedGraph,
+    _ConjunctiveGraphT,
+)
 from rdflib.namespace import RDF, RDFS
 from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef
-from rdflib.util import _coalesce, _iri2uri, find_roots, get_tree
+from rdflib.util import (
+    _coalesce,
+    _has_non_default_graphs,
+    _iri2uri,
+    find_roots,
+    get_tree,
+)
 
 n3source = """\
 @prefix : <http://www.w3.org/2000/10/swap/Primer#>.
@@ -672,3 +688,57 @@ def test_iri2uri(iri: str, expected_result: Union[Set[str], Type[Exception]]) ->
     else:
         assert isinstance(expected_result, set)
         assert result in expected_result
+
+
+def make_has_non_default_graphs_cases() -> Iterable[ParameterSet]:
+    with_named_graphs = [
+        TEST_DATA_DIR / "variants" / "diverse_quads.trig",
+        TEST_DATA_DIR / "variants" / "diverse_quads.py",
+        TEST_DATA_DIR / "variants" / "simple_quad.nq",
+        TEST_DATA_DIR / "variants" / "simple_quad.py",
+    ]
+    without_named_graphs = [
+        TEST_DATA_DIR / "variants" / "diverse_triples.ttl",
+        TEST_DATA_DIR / "variants" / "diverse_triples.py",
+        TEST_DATA_DIR / "variants" / "simple_triple.nt",
+        TEST_DATA_DIR / "variants" / "simple_triple.trig",
+        TEST_DATA_DIR / "variants" / "simple_triple.py",
+    ]
+
+    for graph_path, graph_type in itertools.product(
+        with_named_graphs, (ConjunctiveGraph, Dataset)
+    ):
+        yield pytest.param(
+            graph_path,
+            graph_type,
+            ValueOutcome(True),
+            id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-True",
+        )
+
+    for graph_path, graph_type in itertools.product(
+        without_named_graphs, (ConjunctiveGraph, Dataset)
+    ):
+        marks: Collection[pytest.MarkDecorator] = tuple()
+        # if not graph_path.name.endswith(".py"):
+        #     marks = (pytest.mark.xfail(reason="Triples don't get loaded into the default graph."),)
+
+        yield pytest.param(
+            graph_path,
+            graph_type,
+            ValueOutcome(False),
+            id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-False",
+            marks=marks,
+        )
+
+
+@pytest.mark.parametrize(
+    ["source", "graph_type", "expected_outcome"], make_has_non_default_graphs_cases()
+)
+def test_has_non_default_graphs(
+    source: Path,
+    graph_type: Type[_ConjunctiveGraphT],
+    expected_outcome: ExpectedOutcome[bool],
+) -> None:
+    with expected_outcome.check_raises():
+        graph = load_sources(source, graph_type=graph_type)
+        expected_outcome.check_value(_has_non_default_graphs(graph))
diff --git a/test/utils/variants.py b/test/utils/variants.py
@@ -0,0 +1,34 @@
+"""
+Functionality for interacting with graph variants in `test/data/variants`.
+"""
+
+from functools import lru_cache
+from importlib import import_module
+from typing import Type
+
+from rdflib.graph import Graph, _GraphT
+
+
+def parse_pyvariant(variant_name: str, target: Graph) -> None:
+    """
+    Parse the graph variant with the given name into the target graph.
+
+    :param variant_name: the name of the graph variant to parse
+    :param target: the graph to parse the variant into
+    """
+    module_name = f"test.data.variants.{variant_name}"
+    module = import_module(module_name)
+    module.populate_graph(target)
+
+
+@lru_cache(maxsize=None)
+def load_pyvariant(variant_name: str, graph_type: Type[_GraphT]) -> _GraphT:
+    """
+    Load the graph variant with the given name.
+
+    :param variant_name: the name of the graph variant to load
+    :return: the loaded graph variant
+    """
+    target = graph_type()
+    parse_pyvariant(variant_name, target)
+    return target