Skip to content

Commit d280f6e

Browse files
committed
fix: raise an exception when serialization format does not support quads
- fixes <#2393> checkpoint checkpoint checkpoint
1 parent d1d87d9 commit d280f6e

File tree

10 files changed

+265
-10
lines changed

10 files changed

+265
-10
lines changed

rdflib/util.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
if TYPE_CHECKING:
5454
from rdflib.graph import Graph
5555

56-
5756
__all__ = [
5857
"list2set",
5958
"first",
@@ -615,3 +614,31 @@ def _iri2uri(iri: str) -> str:
615614
uri += "#"
616615

617616
return uri
617+
618+
619+
def _has_non_default_graphs(graph: rdflib.graph.ConjunctiveGraph) -> bool:
620+
"""
621+
Check if the container passed as `graph` contains graphs other than the
622+
default graph.
623+
624+
The intent of this is to detect if the value passed can be serialized using
625+
formats which do not support named graphs like N-Triples and Turtle.
626+
627+
Ideally this function would check if the supplied value contains any named
628+
graphs, but RDFLib assigns a name to the default graph, so the best that can
629+
be done is to check if the supplied graph contains any graphs other than the
630+
default graph.
631+
632+
If the supplied value contains only the default graph and other graphs, this
633+
function will return `False`, otherwise if the value passed contains at
634+
least one graph other than the default graph it will return `True`.
635+
"""
636+
default_context = graph.default_context
637+
# logging.debug("default_context.identifier = %s", default_context.identifier)
638+
for context_index, context in enumerate(graph.contexts()):
639+
# logging.debug("contexts[%s].identifier = %s", context_index, context.identifier)
640+
if context.identifier != default_context.identifier:
641+
return True
642+
if context_index > 0:
643+
return True
644+
return False
File renamed without changes.

test/data/variants/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# multi variant graphs
22

3-
This directory containts variants of the same graph encoded in different
3+
This directory contains variants of the same graph encoded in different
44
formats, or differently in the same format.
55

6-
The graph that a specific file is a variant of is determined by it's filename.
7-
Files that differ only in file extention but have the same basename are
6+
The graph that a specific file is a variant of is determined by its filename.
7+
Files that differ only in file extensions but have the same basename are
88
considered variants of the same graph. Additionally, any suffix that matches
9-
`-variant-[^/]*` is excluded when determening the graph key, so the following
9+
`-variant-[^/]*` is excluded when determining the graph key, so the following
1010
files are all considered variants of the same graph:
1111

1212
```

test/data/variants/diverse_triples.nq

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<http://example.com/subject> <http://example.com/predicate> "日本語の表記体系"@jpx .
2+
<urn:example:subject> <example:predicate> <example:subject> .
3+
<example:object> <http://example.com/predicate> "XSD string" .
4+
<example:subject> <example:predicate> <example:object> .
5+
<example:subject> <example:predicate> "12"^^<http://www.w3.org/2001/XMLSchema#integer> .

test/data/variants/diverse_triples.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,4 @@ def populate_graph(graph: Graph) -> None:
1414
graph.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object))
1515
graph.add((EGSCHEME.subject, EGSCHEME.predicate, Literal(12)))
1616

17-
1817
__all__ = ["populate_graph"]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
@prefix eghttp: <http://example.com/> .
2+
@prefix egurn: <urn:example:> .
3+
@prefix egschema: <example:> .
4+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
5+
6+
{
7+
eghttp:subject eghttp:predicate "日本語の表記体系"@jpx .
8+
9+
egurn:subject egschema:predicate egschema:subject .
10+
}
11+
12+
egschema:object eghttp:predicate "XSD string"^^xsd:string .
13+
14+
egschema:subject egschema:predicate egschema:object,
15+
12 .

test/test_dataset/test_dataset.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@
33
import tempfile
44
from test.data import CONTEXT1, LIKES, PIZZA, TAREK
55
from test.utils.namespace import EGSCHEME
6+
import logging
7+
import os
8+
import shutil
9+
import tempfile
10+
from typing import Optional
11+
from rdflib.term import Identifier, Literal
612

713
import pytest
814

915
from rdflib import URIRef, plugin
1016
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
1117
from rdflib.store import Store
18+
from test.utils.namespace import EGDC, EGSCHEME, EGURN
19+
from rdflib.namespace import XSD
1220

1321
# Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if
1422
# available. This assumes SPARQL1.1 query/update endpoints running locally at
@@ -261,3 +269,47 @@ def test_subgraph_without_identifier() -> None:
261269
) == ("genid", genid_prefix)
262270

263271
assert f"{subgraph.identifier}".startswith(genid_prefix)
272+
273+
274+
def test_updating_datatype() -> None:
275+
dataset = Dataset()
276+
277+
dataset.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object))
278+
279+
egurn_graph = dataset.get_context(EGURN.graph)
280+
egurn_graph.add(
281+
(EGSCHEME.subject, EGDC.predicate, Literal("XSD string", datatype=XSD.string))
282+
)
283+
284+
def find_literal_obj_ctx_id(dataset: Dataset, literal_value: str) -> Optional[Identifier]:
285+
for quad in (quad for quad in dataset.quads((None, None, None, None))):
286+
if isinstance(quad[2], Literal) and quad[2].value == literal_value:
287+
logging.debug("quad = %s", quad)
288+
return quad[3]
289+
return None
290+
291+
assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph
292+
293+
for context in dataset.contexts():
294+
logging.debug("context.identifier = %s", context.identifier)
295+
for triple in context:
296+
logging.debug("triple = %s", triple)
297+
object = triple[2]
298+
if not isinstance(object, Literal):
299+
continue
300+
if object.datatype is None:
301+
continue
302+
logging.debug("object.datatype = %s", object.datatype)
303+
if object.datatype == XSD.string:
304+
object._datatype = None
305+
306+
assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph
307+
308+
309+
# found = False
310+
# for quad in (quad for quad in dataset.quads((None, None, None, None))):
311+
# if isinstance(quad[2], Literal) and quad[2].value == "XSD string":
312+
# logging.debug("quad = %s", quad)
313+
# # found = True
314+
# # break
315+
# # assert found is True
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import itertools
2+
import logging
3+
from test.utils.graph import GRAPH_FORMATS, GraphType
4+
from test.utils.variants import load_pyvariant
5+
from typing import Dict, Iterable, Type
6+
7+
import pytest
8+
from _pytest.mark.structures import ParameterSet
9+
from _pytest.outcomes import Failed
10+
11+
from rdflib.graph import ConjunctiveGraph, Dataset, Graph
12+
13+
14+
def make_quads_in_triples_cases() -> Iterable[ParameterSet]:
15+
"""
16+
Generate test cases for serializing named graphs (i.e. quads) into a format
17+
that does not support named graphs.
18+
"""
19+
triple_only_formats = [
20+
graph_format
21+
for graph_format in GRAPH_FORMATS
22+
if graph_format.info.graph_types == {GraphType.TRIPLE}
23+
]
24+
for graph_type, graph_format in itertools.product(
25+
(ConjunctiveGraph, Dataset), triple_only_formats
26+
):
27+
for serializer in graph_format.info.serializers:
28+
yield pytest.param(
29+
graph_type, serializer, marks=pytest.mark.xfail(raises=Failed)
30+
)
31+
32+
33+
CONJUNCTIVE_GRAPH_WITH_QUADS = load_pyvariant("diverse_quads", ConjunctiveGraph)
34+
DATASET_WITH_QUADS = load_pyvariant("diverse_quads", Dataset)
35+
36+
GRAPHS: Dict[Type[Graph], Graph] = {
37+
ConjunctiveGraph: CONJUNCTIVE_GRAPH_WITH_QUADS,
38+
Dataset: DATASET_WITH_QUADS,
39+
}
40+
41+
42+
@pytest.mark.parametrize(["graph_type", "serializer"], make_quads_in_triples_cases())
43+
def test_quads_in_triples(graph_type: Type[ConjunctiveGraph], serializer: str) -> None:
44+
"""
45+
Serializing named graphs (i.e. quads) inside a `ConjunctiveGraph` into a
46+
format that does not support named graphs should result in an exception.
47+
"""
48+
graph = GRAPHS[graph_type]
49+
assert type(graph) is graph_type
50+
with pytest.raises(Exception) as caught:
51+
graph.serialize(format=serializer)
52+
53+
logging.debug("caught.value = %r", caught.value, exc_info=caught.value)

test/test_util.py

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,37 @@
11
from __future__ import annotations
22

3+
import itertools
4+
from json import load
35
import logging
46
import time
57
from contextlib import ExitStack
68
from pathlib import Path
79
from test.data import TEST_DATA_DIR
8-
from test.utils.graph import cached_graph
10+
from test.utils.graph import cached_graph, load_sources
911
from test.utils.namespace import RDFT
10-
from typing import Any, Collection, List, Optional, Set, Tuple, Type, Union
12+
from test.utils.outcome import ExpectedOutcome, ValueOutcome
13+
from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type, Union
1114

1215
import pytest
16+
from _pytest.mark.structures import ParameterSet
1317

1418
from rdflib import XSD, util
15-
from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph
19+
from rdflib.graph import (
20+
ConjunctiveGraph,
21+
Dataset,
22+
Graph,
23+
QuotedGraph,
24+
_ConjunctiveGraphT,
25+
)
1626
from rdflib.namespace import RDF, RDFS
1727
from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef
18-
from rdflib.util import _coalesce, _iri2uri, find_roots, get_tree
28+
from rdflib.util import (
29+
_coalesce,
30+
_has_non_default_graphs,
31+
_iri2uri,
32+
find_roots,
33+
get_tree,
34+
)
1935

2036
n3source = """\
2137
@prefix : <http://www.w3.org/2000/10/swap/Primer#>.
@@ -672,3 +688,57 @@ def test_iri2uri(iri: str, expected_result: Union[Set[str], Type[Exception]]) ->
672688
else:
673689
assert isinstance(expected_result, set)
674690
assert result in expected_result
691+
692+
693+
def make_has_non_default_graphs_cases() -> Iterable[ParameterSet]:
694+
with_named_graphs = [
695+
TEST_DATA_DIR / "variants" / "diverse_quads.trig",
696+
TEST_DATA_DIR / "variants" / "diverse_quads.py",
697+
TEST_DATA_DIR / "variants" / "simple_quad.nq",
698+
TEST_DATA_DIR / "variants" / "simple_quad.py",
699+
]
700+
without_named_graphs = [
701+
TEST_DATA_DIR / "variants" / "diverse_triples.ttl",
702+
TEST_DATA_DIR / "variants" / "diverse_triples.py",
703+
TEST_DATA_DIR / "variants" / "simple_triple.nt",
704+
TEST_DATA_DIR / "variants" / "simple_triple.trig",
705+
TEST_DATA_DIR / "variants" / "simple_triple.py",
706+
]
707+
708+
for graph_path, graph_type in itertools.product(
709+
with_named_graphs, (ConjunctiveGraph, Dataset)
710+
):
711+
yield pytest.param(
712+
graph_path,
713+
graph_type,
714+
ValueOutcome(True),
715+
id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-True",
716+
)
717+
718+
for graph_path, graph_type in itertools.product(
719+
without_named_graphs, (ConjunctiveGraph, Dataset)
720+
):
721+
marks: Collection[pytest.MarkDecorator] = tuple()
722+
# if not graph_path.name.endswith(".py"):
723+
# marks = (pytest.mark.xfail(reason="Triples don't get loaded into the default graph."),)
724+
725+
yield pytest.param(
726+
graph_path,
727+
graph_type,
728+
ValueOutcome(False),
729+
id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-False",
730+
marks=marks,
731+
)
732+
733+
734+
@pytest.mark.parametrize(
735+
["source", "graph_type", "expected_outcome"], make_has_non_default_graphs_cases()
736+
)
737+
def test_has_non_default_graphs(
738+
source: Path,
739+
graph_type: Type[_ConjunctiveGraphT],
740+
expected_outcome: ExpectedOutcome[bool],
741+
) -> None:
742+
with expected_outcome.check_raises():
743+
graph = load_sources(source, graph_type=graph_type)
744+
expected_outcome.check_value(_has_non_default_graphs(graph))

test/utils/variants.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Functionality for interacting with graph variants in `test/data/variants`.
3+
"""
4+
5+
from functools import lru_cache
6+
from importlib import import_module
7+
from typing import Type
8+
9+
from rdflib.graph import Graph, _GraphT
10+
11+
12+
def parse_pyvariant(variant_name: str, target: Graph) -> None:
13+
"""
14+
Parse the graph variant with the given name into the target graph.
15+
16+
:param variant_name: the name of the graph variant to parse
17+
:param target: the graph to parse the variant into
18+
"""
19+
module_name = f"test.data.variants.{variant_name}"
20+
module = import_module(module_name)
21+
module.populate_graph(target)
22+
23+
24+
@lru_cache(maxsize=None)
25+
def load_pyvariant(variant_name: str, graph_type: Type[_GraphT]) -> _GraphT:
26+
"""
27+
Load the graph variant with the given name.
28+
29+
:param variant_name: the name of the graph variant to load
30+
:return: the loaded graph variant
31+
"""
32+
target = graph_type()
33+
parse_pyvariant(variant_name, target)
34+
return target

0 commit comments

Comments
 (0)