diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 18cc4b14..e11b9ffa 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ CHANGELOG ********* +`v1.0.17`_ (2019-12-22) +======================= +* Enhancement of validation-only speed (~15%) +* Added *is_valid()* and *iter_errors()* to module API + `v1.0.16`_ (2019-11-18) ======================= * Improved XMLResource class for working with compressed files @@ -272,3 +277,4 @@ v0.9.6 (2017-05-05) .. _v1.0.14: https://github.com/brunato/xmlschema/compare/v1.0.13...v1.0.14 .. _v1.0.15: https://github.com/brunato/xmlschema/compare/v1.0.14...v1.0.15 .. _v1.0.16: https://github.com/brunato/xmlschema/compare/v1.0.15...v1.0.16 +.. _v1.0.17: https://github.com/brunato/xmlschema/compare/v1.0.16...v1.0.17 diff --git a/doc/api.rst b/doc/api.rst index 9e57b7c5..d533a1f1 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -7,6 +7,8 @@ Document level API ------------------ .. autofunction:: xmlschema.validate +.. autofunction:: xmlschema.is_valid +.. autofunction:: xmlschema.iter_errors .. autofunction:: xmlschema.to_dict .. autofunction:: xmlschema.to_json .. autofunction:: xmlschema.from_json @@ -47,6 +49,7 @@ Schema level API .. automethod:: create_schema .. automethod:: create_any_content_group .. automethod:: create_any_attribute_group + .. automethod:: create_any_type .. automethod:: get_locations .. automethod:: include_schema diff --git a/doc/conf.py b/doc/conf.py index 90a672db..53a49434 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ # The short X.Y version. version = '1.0' # The full version, including alpha/beta/rc tags. -release = '1.0.16' +release = '1.0.17' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/publiccode.yml b/publiccode.yml index 32fc8dc0..77d0259e 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,8 +6,8 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2019-11-18' -softwareVersion: v1.0.16 +releaseDate: '2019-12-22' +softwareVersion: v1.0.17 developmentStatus: stable platforms: - linux diff --git a/setup.py b/setup.py index 94be791e..520d5c76 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def run(self): setup( name='xmlschema', - version='1.0.16', + version='1.0.17', setup_requires=['elementpath~=1.3.0'], install_requires=['elementpath~=1.3.0'], packages=['xmlschema'], diff --git a/xmlschema/__init__.py b/xmlschema/__init__.py index cfcf02e9..346076a6 100644 --- a/xmlschema/__init__.py +++ b/xmlschema/__init__.py @@ -21,7 +21,7 @@ ElementData, XMLSchemaConverter, UnorderedConverter, ParkerConverter, BadgerFishConverter, AbderaConverter, JsonMLConverter ) -from .documents import validate, to_dict, to_json, from_json +from .documents import validate, is_valid, iter_errors, to_dict, to_json, from_json from .validators import ( XMLSchemaValidatorError, XMLSchemaParseError, XMLSchemaNotBuiltError, @@ -31,7 +31,7 @@ XsdGlobals, XMLSchemaBase, XMLSchema, XMLSchema10, XMLSchema11 ) -__version__ = '1.0.16' +__version__ = '1.0.17' __author__ = "Davide Brunato" __contact__ = "brunato@sissa.it" __copyright__ = "Copyright 2016-2019, SISSA" diff --git a/xmlschema/converters.py b/xmlschema/converters.py index 3570d47f..f8b7bfc2 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -264,7 +264,8 @@ def element_decode(self, data, xsd_element, level=0): if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: schema_namespaces = set(xsd_element.namespaces.values()) result_dict.update( - ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) for k, v in self.items() + ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) + for k, v in self._namespaces.items() if v in schema_namespaces or v == XSI_NAMESPACE ) @@ -325,6 +326,8 @@ def element_encode(self, obj, xsd_element, level=0): if not isinstance(obj, (self.dict, dict)): if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): return ElementData(tag, obj, None, {}) + elif xsd_element.type.mixed and not isinstance(obj, list): + return ElementData(tag, obj, None, {}) else: return ElementData(tag, None, obj, {}) @@ -900,7 +903,9 @@ def element_decode(self, data, xsd_element, level=0): ]) if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: - attributes.update([('xmlns:%s' % k if k else 'xmlns', v) for k, v in self.items()]) + attributes.update( + [('xmlns:%s' % k if k else 'xmlns', v) for k, v in self._namespaces.items()] + ) if attributes: result_list.insert(1, attributes) return result_list @@ -913,7 +918,7 @@ def element_encode(self, obj, xsd_element, level=0): data_len = len(obj) if data_len == 1: - if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')): + if not xsd_element.is_matching(unmap_qname(obj[0]), self._namespaces.get('')): raise XMLSchemaValueError("Unmatched tag") return ElementData(xsd_element.name, None, None, attributes) @@ -930,7 +935,7 @@ def element_encode(self, obj, xsd_element, level=0): else: content_index = 2 - if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')): + if not xsd_element.is_matching(unmap_qname(obj[0]), self._namespaces.get('')): raise XMLSchemaValueError("Unmatched tag") if data_len <= content_index: diff --git a/xmlschema/documents.py b/xmlschema/documents.py index 02e64718..007f003b 100644 --- a/xmlschema/documents.py +++ b/xmlschema/documents.py @@ -13,17 +13,20 @@ from .compat import ordered_dict_class from .resources import fetch_schema_locations, XMLResource -from .validators.schema import XMLSchema, XMLSchemaBase +from .validators import XMLSchema, XMLSchemaBase def get_context(source, schema=None, cls=None, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False): """ - Helper method for obtaining XML document validation/decode context. - Return an XMLResource instance and a schema instance. + Get the XML document validation/decode context. + + :return: an XMLResource instance and a schema instance. """ if cls is None: cls = XMLSchema + if not isinstance(source, XMLResource): + source = XMLResource(source, base_url, defuse=defuse, timeout=timeout, lazy=lazy) try: schema, locations = fetch_schema_locations(source, locations, base_url=base_url) @@ -36,9 +39,6 @@ def get_context(source, schema=None, cls=None, locations=None, base_url=None, else: schema = cls(schema, validation='strict', locations=locations, defuse=defuse, timeout=timeout) - if not isinstance(source, XMLResource): - source = XMLResource(source, defuse=defuse, timeout=timeout, lazy=lazy) - return source, schema @@ -75,6 +75,26 @@ def validate(xml_document, schema=None, cls=None, path=None, schema_path=None, u schema.validate(source, path, schema_path, use_defaults, namespaces) +def is_valid(xml_document, schema=None, cls=None, path=None, schema_path=None, use_defaults=True, + namespaces=None, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False): + """ + Like :meth:`validate` except that do not raises an exception but returns ``True`` if + the XML document is valid, ``False`` if it's invalid. + """ + source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy) + return schema.is_valid(source, path, schema_path, use_defaults, namespaces) + + +def iter_errors(xml_document, schema=None, cls=None, path=None, schema_path=None, use_defaults=True, + namespaces=None, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False): + """ + Creates an iterator for the errors generated by the validation of an XML document. + Takes the same arguments of the function :meth:`validate`. + """ + source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy) + return schema.iter_errors(source, path, schema_path, use_defaults, namespaces) + + def to_dict(xml_document, schema=None, cls=None, path=None, process_namespaces=True, locations=None, base_url=None, defuse='remote', timeout=300, lazy=False, **kwargs): """ diff --git a/xmlschema/etree.py b/xmlschema/etree.py index 7c4d28f3..d7a96ebc 100644 --- a/xmlschema/etree.py +++ b/xmlschema/etree.py @@ -25,7 +25,7 @@ from .compat import PY3 from .exceptions import XMLSchemaTypeError from .namespaces import XSLT_NAMESPACE, HFP_NAMESPACE, VC_NAMESPACE, get_namespace -from .qnames import get_qname, qname_to_prefixed +from .qnames import get_qname, qname_to_prefixed, XSI_SCHEMA_LOCATION, XSI_NONS_SCHEMA_LOCATION ### # Programmatic import of xml.etree.ElementTree @@ -263,6 +263,18 @@ def etree_getpath(elem, root, namespaces=None, relative=True, add_position=False return path +def etree_iter_location_hints(elem): + """Yields schema location hints contained in the attributes of an element.""" + if XSI_SCHEMA_LOCATION in elem.attrib: + locations = elem.attrib[XSI_SCHEMA_LOCATION].split() + for ns, url in zip(locations[0::2], locations[1::2]): + yield ns, url + + if XSI_NONS_SCHEMA_LOCATION in elem.attrib: + for url in elem.attrib[XSI_NONS_SCHEMA_LOCATION].split(): + yield '', url + + def etree_elements_assert_equal(elem, other, strict=True, skip_comments=True): """ Tests the equality of two XML Element trees. diff --git a/xmlschema/namespaces.py b/xmlschema/namespaces.py index 67f8e4b6..7bb4a042 100644 --- a/xmlschema/namespaces.py +++ b/xmlschema/namespaces.py @@ -119,28 +119,32 @@ def clear(self): class NamespaceMapper(MutableMapping): """ - A class to map/unmap namespace prefixes to URIs. The + A class to map/unmap namespace prefixes to URIs. The mapped namespaces are + automatically registered when set. Namespaces can be updated overwriting + the existing registration or inserted using an alternative prefix. - :param namespaces: Initial data with namespace prefixes and URIs. + :param namespaces: initial data with namespace prefixes and URIs. + :param register_namespace: a two-arguments function for registering namespaces \ + on ElementTree module. """ def __init__(self, namespaces=None, register_namespace=None): self._namespaces = {} self.register_namespace = register_namespace if namespaces is not None: - self.update(namespaces) + self._namespaces.update(namespaces) - def __getitem__(self, key): - return self._namespaces[key] + def __getitem__(self, prefix): + return self._namespaces[prefix] - def __setitem__(self, key, value): - self._namespaces[key] = value + def __setitem__(self, prefix, uri): + self._namespaces[prefix] = uri try: - self.register_namespace(key, value) + self.register_namespace(prefix, uri) except (TypeError, ValueError): pass - def __delitem__(self, key): - del self._namespaces[key] + def __delitem__(self, prefix): + del self._namespaces[prefix] def __iter__(self): return iter(self._namespaces) @@ -148,6 +152,10 @@ def __iter__(self): def __len__(self): return len(self._namespaces) + @property + def namespaces(self): + return self._namespaces + @property def default_namespace(self): return self._namespaces.get('') @@ -155,6 +163,30 @@ def default_namespace(self): def clear(self): self._namespaces.clear() + def insert_item(self, prefix, uri): + """ + A method for setting an item that checks the prefix before inserting. + In case of collision the prefix is changed adding a numerical suffix. + """ + if not prefix: + if '' not in self._namespaces: + self._namespaces[prefix] = uri + return + elif self._namespaces[''] == uri: + return + prefix = 'default' + + while prefix in self._namespaces: + if self._namespaces[prefix] == uri: + return + match = re.search(r'(\d+)$', prefix) + if match: + index = int(match.group()) + 1 + prefix = prefix[:match.span()[0]] + str(index) + else: + prefix += '0' + self._namespaces[prefix] = uri + def map_qname(self, qname): """ Converts an extended QName to the prefixed format. Only registered @@ -170,7 +202,7 @@ def map_qname(self, qname): return qname qname_uri = get_namespace(qname) - for prefix, uri in self.items(): + for prefix, uri in self._namespaces.items(): if uri != qname_uri: continue if prefix: diff --git a/xmlschema/resources.py b/xmlschema/resources.py index adb9c02c..7e28c335 100644 --- a/xmlschema/resources.py +++ b/xmlschema/resources.py @@ -19,8 +19,7 @@ ) from .exceptions import XMLSchemaTypeError, XMLSchemaValueError, XMLSchemaURLError, XMLSchemaOSError from .namespaces import get_namespace -from .qnames import XSI_SCHEMA_LOCATION, XSI_NONS_SCHEMA_LOCATION -from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring +from .etree import ElementTree, PyElementTree, SafeXMLParser, etree_tostring, etree_iter_location_hints DEFUSE_MODES = ('always', 'remote', 'never') @@ -156,11 +155,11 @@ def fetch_resource(location, base_url=None, timeout=30): resource = urlopen(url, timeout=timeout) except URLError as err: # fallback joining the path without a base URL - url = normalize_url(location) + alt_url = normalize_url(location) try: - resource = urlopen(url, timeout=timeout) + resource = urlopen(alt_url, timeout=timeout) except URLError: - raise XMLSchemaURLError(reason=err.reason) + raise XMLSchemaURLError("cannot access to resource %r: %s" % (url, err.reason)) else: resource.close() return url @@ -169,21 +168,24 @@ def fetch_resource(location, base_url=None, timeout=30): return url -def fetch_schema_locations(source, locations=None, **resource_options): +def fetch_schema_locations(source, locations=None, base_url=None, defuse='remote', timeout=30): """ - Fetches the schema URL for the source's root of an XML data source and a list of location hints. + Fetches schema location hints from an XML data source and a list of location hints. If an accessible schema location is not found raises a ValueError. - :param source: an Element or an Element Tree with XML data or an URL or a file-like object. - :param locations: a dictionary or dictionary items with Schema location hints. - :param resource_options: keyword arguments for providing :class:`XMLResource` class init options. - :return: A tuple with the URL referring to the first reachable schema resource, a list \ - of dictionary items with normalized location hints. + :param source: can be an :class:`XMLResource` instance, a file-like object a path \ + to a file or an URI of a resource or an Element instance or an ElementTree instance or \ + a string containing the XML data. If the passed argument is not an :class:`XMLResource` \ + instance a new one is built using this and *defuse*, *timeout* and *lazy* arguments. + :param locations: a dictionary or dictionary items with additional schema location hints. + :param base_url: the same argument of the :class:`XMLResource`. + :param defuse: the same argument of the :class:`XMLResource`. + :param timeout: the same argument of the :class:`XMLResource` but with a reduced default. + :return: A 2-tuple with the URL referring to the first reachable schema resource \ + and a list of dictionary items with normalized location hints. """ - base_url = resource_options.pop('base_url', None) - timeout = resource_options.pop('timeout', 30) if not isinstance(source, XMLResource): - resource = XMLResource(source, base_url, timeout=timeout, **resource_options) + resource = XMLResource(source, base_url, defuse, timeout) else: resource = source @@ -203,39 +205,30 @@ def fetch_schema_locations(source, locations=None, **resource_options): raise XMLSchemaValueError("not found a schema for XML data resource {!r}.".format(source)) -def fetch_schema(source, locations=None, **resource_options): +def fetch_schema(source, locations=None, base_url=None, defuse='remote', timeout=30): """ - Fetches the schema URL for the source's root of an XML data source. - If an accessible schema location is not found raises a ValueError. - - :param source: An an Element or an Element Tree with XML data or an URL or a file-like object. - :param locations: A dictionary or dictionary items with schema location hints. - :param resource_options: keyword arguments for providing :class:`XMLResource` class init options. - :return: An URL referring to a reachable schema resource. + Like :meth:`fetch_schema_locations` but returns only a reachable + location hint for a schema related to the source's namespace. """ - return fetch_schema_locations(source, locations, **resource_options)[0] + return fetch_schema_locations(source, locations, base_url, defuse, timeout)[0] -def fetch_namespaces(source, **resource_options): +def fetch_namespaces(source, base_url=None, defuse='remote', timeout=30): """ - Extracts namespaces with related prefixes from the XML data source. If the source is - an lxml's ElementTree/Element returns the nsmap attribute of the root. If a duplicate - prefix declaration is encountered then adds the namespace using a different prefix, - but only in the case if the namespace URI is not already mapped by another prefix. - - :param source: a string containing the XML document or file path or an url \ - or a file like object or an ElementTree or Element. - :param resource_options: keyword arguments for providing :class:`XMLResource` init options. - :return: A dictionary for mapping namespace prefixes to full URI. + Fetches namespaces information from the XML data source. The argument *source* + can be a string containing the XML document or file path or an url or a file-like + object or an ElementTree instance or an Element instance. A dictionary with + namespace mappings is returned. """ - timeout = resource_options.pop('timeout', 30) - return XMLResource(source, timeout=timeout, **resource_options).get_namespaces() + resource = XMLResource(source, base_url, defuse, timeout) + return resource.get_namespaces() def load_xml_resource(source, element_only=True, **resource_options): """ Load XML data source into an Element tree, returning the root Element, the XML text and an url, if available. Usable for XML data files of small or medium sizes, as XSD schemas. + This helper function is deprecated from v1.0.17, use :class:`XMLResource` instead. :param source: an URL, a filename path or a file-like object. :param element_only: if True the function returns only the root Element of the tree. @@ -243,6 +236,10 @@ def load_xml_resource(source, element_only=True, **resource_options): :return: a tuple with three items (root Element, XML text and XML URL) or \ only the root Element if 'element_only' argument is True. """ + import warnings + warnings.warn("load_xml_resource() function will be removed in 1.1 version", + DeprecationWarning, stacklevel=2) + lazy = resource_options.pop('lazy', False) source = XMLResource(source, lazy=lazy, **resource_options) if element_only: @@ -256,22 +253,20 @@ class XMLResource(object): """ XML resource reader based on ElementTree and urllib. - :param source: a string containing the XML document or file path or an URL or a file like \ - object or an ElementTree or an Element. - :param base_url: is an optional base URL, used for the normalization of relative paths when \ - the URL of the resource can't be obtained from the source argument. - :param defuse: set the usage of SafeXMLParser for XML data. Can be 'always', 'remote' or 'never'. \ - Default is 'remote' that uses the defusedxml only when loading remote data. + :param source: a string containing the XML document or file path or an URL or a \ + file like object or an ElementTree or an Element. + :param base_url: is an optional base URL, used for the normalization of relative paths \ + when the URL of the resource can't be obtained from the source argument. + :param defuse: set the usage of SafeXMLParser for XML data. Can be 'always', 'remote' \ + or 'never'. Default is 'remote' that uses the defusedxml only when loading remote data. :param timeout: the timeout in seconds for the connection attempt in case of remote data. - :param lazy: if set to `False` the source is fully loaded into and processed from memory. \ - Default is `True` that means that only the root element of the source is loaded. This is \ - ignored if *source* is an Element or an ElementTree. + :param lazy: if a value `False` is provided the XML data is fully loaded into and \ + processed from memory. For default only the root element of the source is loaded, \ + except in case the *source* argument is an Element or an ElementTree instance. """ - def __init__(self, source, base_url=None, defuse='remote', timeout=300, lazy=True): - if base_url is not None and not isinstance(base_url, string_base_type): - raise XMLSchemaValueError(u"'base_url' argument has to be a string: {!r}".format(base_url)) + _root = _text = _url = None - self._root = self._text = self._url = None + def __init__(self, source, base_url=None, defuse='remote', timeout=300, lazy=True): self._base_url = base_url self.defuse = defuse self.timeout = timeout @@ -301,12 +296,27 @@ def __repr__(self): def __setattr__(self, name, value): if name == 'source': self._root, self._text, self._url = self._fromsource(value) - elif name == 'defuse' and value not in DEFUSE_MODES: - raise XMLSchemaValueError(u"'defuse' attribute: {!r} is not a defuse mode.".format(value)) - elif name == 'timeout' and (not isinstance(value, int) or value <= 0): - raise XMLSchemaValueError(u"'timeout' attribute must be a positive integer: {!r}".format(value)) - elif name == 'lazy' and not isinstance(value, bool): - raise XMLSchemaValueError(u"'lazy' attribute must be a boolean: {!r}".format(value)) + elif name == '_base_url': + if value is not None and not isinstance(value, string_base_type): + msg = "invalid type {!r} for the attribute 'base_url'" + raise XMLSchemaTypeError(msg.format(type(value))) + elif name == 'defuse': + if value is not None and not isinstance(value, string_base_type): + msg = "invalid type {!r} for the attribute 'defuse'" + raise XMLSchemaTypeError(msg.format(type(value))) + elif value not in DEFUSE_MODES: + msg = "'defuse' attribute: {!r} is not a defuse mode" + raise XMLSchemaValueError(msg.format(value)) + elif name == 'timeout': + if not isinstance(value, int): + msg = "invalid type {!r} for the attribute 'timeout'" + raise XMLSchemaTypeError(msg.format(type(value))) + elif value <= 0: + raise XMLSchemaValueError("the attribute 'timeout' must be a positive integer") + elif name == '_lazy': + if not isinstance(value, bool): + msg = "invalid type {!r} for the attribute 'lazy'" + raise XMLSchemaValueError(msg.format(type(value))) super(XMLResource, self).__setattr__(name, value) def _fromsource(self, source): @@ -407,7 +417,7 @@ def url(self): @property def base_url(self): - """The base URL for completing relative locations.""" + """The effective base URL used for completing relative locations.""" return os.path.dirname(self._url) if self._url else self._base_url @property @@ -426,7 +436,7 @@ def document(self): @property def namespace(self): """The namespace of the XML resource.""" - return get_namespace(self._root.tag) if self._root is not None else None + return get_namespace(self._root.tag) @staticmethod def defusing(source): @@ -532,10 +542,11 @@ def open(self): return self.source elif self._url is None: raise XMLSchemaValueError("can't open, the resource has no URL associated.") + try: return urlopen(self._url, timeout=self.timeout) except URLError as err: - raise XMLSchemaURLError(reason="cannot access to resource %r: %s" % (self._url, err.reason)) + raise XMLSchemaURLError("cannot access to resource %r: %s" % (self._url, err.reason)) def seek(self, position): """ @@ -643,7 +654,15 @@ def iter(self, tag=None): resource.close() def iterfind(self, path=None, namespaces=None): - """XML resource tree iterfind selector.""" + """ + XML resource tree iterfind selector. + + :param path: an XPath expression to select nodes. If not provided the \ + iteration returns only the root node. + :param namespaces: optional mapping from namespace prefixes to URIs. If the \ + resource is lazy and an empty dictionary is provided, the namespace map is \ + updated during the iteration. + """ if not self._lazy: if path is None: yield self._root @@ -659,20 +678,38 @@ def iterfind(self, path=None, namespaces=None): self.load() resource = StringIO(self._text) + if namespaces or namespaces is None: + events = ('start', 'end') + nsmap = None + else: + # Track ad update namespaces + events = ('start-ns', 'end-ns', 'start', 'end') + nsmap = [] + try: if path is None: level = 0 - for event, elem in self.iterparse(resource, events=('start', 'end')): + for event, node in self.iterparse(resource, events): if event == "start": if level == 0: self._root.clear() - self._root = elem + self._root = node level += 1 - else: + elif event == 'end': level -= 1 if level == 0: - yield elem - elem.clear() + yield node + node.clear() + elif event == 'start-ns': + nsmap.append(node) + namespaces[node[0]] = node[1] + else: + try: + del namespaces[nsmap.pop()[0]] + except KeyError: + pass + namespaces.update(nsmap) + else: selector = Selector(path, namespaces, strict=False, parser=XmlResourceXPathParser) path = path.replace(' ', '').replace('./', '') @@ -680,116 +717,158 @@ def iterfind(self, path=None, namespaces=None): select_all = '*' in path and set(path).issubset({'*', '/'}) level = 0 - for event, elem in self.iterparse(resource, events=('start', 'end')): + for event, node in self.iterparse(resource, events): if event == "start": if level == 0: self._root.clear() - self._root = elem + self._root = node level += 1 - else: + elif event == 'end': level -= 1 if level == path_level and \ - (select_all or elem in selector.select(self._root)): - yield elem - elem.clear() + (select_all or node in selector.select(self._root)): + yield node + node.clear() elif level == 0: - elem.clear() + node.clear() + elif event == 'start-ns': + nsmap.append(node) + namespaces[node[0]] = node[1] + else: + try: + del namespaces[nsmap.pop()[0]] + except KeyError: + pass + namespaces.update(nsmap) + finally: if self.source is not resource: resource.close() - def iter_location_hints(self): - """Yields schema location hints from the XML tree.""" - for elem in self.iter(): - try: - locations = elem.attrib[XSI_SCHEMA_LOCATION] - except KeyError: - pass - else: - locations = locations.split() - for ns, url in zip(locations[0::2], locations[1::2]): - yield ns, url + def iter_location_hints(self, root_only=False): + """ + Yields schema location hints from the XML resource. - try: - locations = elem.attrib[XSI_NONS_SCHEMA_LOCATION] - except KeyError: - pass - else: - for url in locations.split(): - yield '', url + :param root_only: if `True` yields only the location hints declared in \ + the root element. + """ + if root_only: + for ns_url in etree_iter_location_hints(self._root): + yield ns_url + return + + if self._url is not None or hasattr(self.source, 'read'): + resource = self.open() + elif isinstance(self._text, string_base_type): + resource = StringIO(self._text) + else: + for elem in self._root.iter(): + for ns_url in etree_iter_location_hints(elem): + yield ns_url + return + + try: + for event, node in self.iterparse(resource, events=('start', 'end')): + if event == 'end': + node.clear() + else: + for ns_url in etree_iter_location_hints(node): + yield ns_url + except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError): + pass + finally: + if self.source is not resource: + resource.close() - def get_namespaces(self): + def get_namespaces(self, namespaces=None, root_only=False): """ Extracts namespaces with related prefixes from the XML resource. If a duplicate - prefix declaration is encountered then adds the namespace using a different prefix, - but only in the case if the namespace URI is not already mapped by another prefix. - - :return: A dictionary for mapping namespace prefixes to full URI. + prefix declaration is encountered and the prefix maps a different namespace, + adds the namespace using a different generated prefix. The empty prefix '' is + used only if it's declared at root level to avoid erroneous mapping of local + names. In other cases uses 'default' prefix as substitute. + + :param namespaces: builds the namespace map starting over the dictionary provided. + :param root_only: if `True` extracts only the namespaces declared in the root element. + :return: a dictionary for mapping namespace prefixes to full URI. """ def update_nsmap(prefix, uri): - if prefix not in nsmap and (prefix or not local_root): - nsmap[prefix] = uri - elif not any(uri == ns for ns in nsmap.values()): - if not prefix: - try: - prefix = re.search(r'(\w+)$', uri.strip()).group() - except AttributeError: - return - - while prefix in nsmap: - match = re.search(r'(\d+)$', prefix) - if match: - index = int(match.group()) + 1 - prefix = prefix[:match.span()[0]] + str(index) - else: - prefix += '2' - nsmap[prefix] = uri + if not prefix: + if '' not in nsmap: + nsmap[prefix] = uri + return + elif nsmap[''] == uri: + return + prefix = 'default' + + while prefix in nsmap: + if nsmap[prefix] == uri: + return + match = re.search(r'(\d+)$', prefix) + if match: + index = int(match.group()) + 1 + prefix = prefix[:match.span()[0]] + str(index) + else: + prefix += '0' + nsmap[prefix] = uri - local_root = self.root.tag[0] != '{' nsmap = {} + if not self.namespace: + nsmap[''] = '' + if namespaces: + nsmap.update(namespaces) if self._url is not None or hasattr(self.source, 'read'): resource = self.open() - try: - for event, node in self.iterparse(resource, events=('start-ns', 'end')): - if event == 'start-ns': - update_nsmap(*node) - else: - node.clear() - except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError): - pass - finally: - # We don't want to close the file obj if it wasn't - # originally opened by `XMLResource`. That is the concern - # of the code where the file obj came from. - if self.source is not resource: - resource.close() elif isinstance(self._text, string_base_type): - try: - for event, node in self.iterparse(StringIO(self._text), events=('start-ns', 'end')): - if event == 'start-ns': - update_nsmap(*node) - else: - node.clear() - except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError): - pass + resource = StringIO(self._text) else: - # Warning: can extracts namespace information only from lxml etree structures - try: - for elem in self._root.iter(): - for k, v in elem.nsmap.items(): + if hasattr(self._root, 'nsmap'): + # Can extract namespace mapping information only from lxml etree structures + if root_only: + for k, v in self._root.nsmap.items(): update_nsmap(k if k is not None else '', v) - except (AttributeError, TypeError): - pass # Not an lxml's tree or element + else: + for elem in self._root.iter(): + for k, v in elem.nsmap.items(): + update_nsmap(k if k is not None else '', v) + + if nsmap.get('') == '': + del nsmap[''] + return nsmap + + events = ('start-ns', 'start') if root_only else ('start-ns', 'end') + try: + for event, node in self.iterparse(resource, events): + if event == 'start-ns': + update_nsmap(*node) + elif event == 'end': + node.clear() + else: + break + except (ElementTree.ParseError, PyElementTree.ParseError, UnicodeEncodeError): + pass + finally: + # We don't want to close the file obj if it wasn't + # originally opened by `XMLResource`. That is the concern + # of the code where the file obj came from. + if self.source is not resource: + resource.close() + if nsmap.get('') == '': + del nsmap[''] return nsmap - def get_locations(self, locations=None): + def get_locations(self, locations=None, root_only=False): """ - Returns a list of schema location hints. The locations are normalized using the - base URL of the instance. The *locations* argument can be a dictionary or a list - of namespace resources, that are inserted before the schema location hints extracted - from the XML resource. + Extracts a list of normalized schema location hints from the XML resource. + The locations are normalized using the base URL of the instance. + + :param locations: a dictionary or a list of namespace resources that is \ + inserted before the schema location hints extracted from the XML resource. + :param root_only: if `True` extracts only the location hints declared in \ + the root element. + :returns: a list of couples containing namespace location hints. """ base_url = self.base_url location_hints = [] @@ -803,5 +882,7 @@ def get_locations(self, locations=None): except AttributeError: location_hints.extend([(ns, normalize_url(url, base_url)) for ns, url in locations]) - location_hints.extend([(ns, normalize_url(url, base_url)) for ns, url in self.iter_location_hints()]) + location_hints.extend([ + (ns, normalize_url(url, base_url)) for ns, url in self.iter_location_hints(root_only) + ]) return location_hints diff --git a/xmlschema/tests/test_cases/features/attributes/default_attributes.xsd b/xmlschema/tests/test_cases/features/attributes/default_attributes.xsd index f67468b1..fa06174f 100644 --- a/xmlschema/tests/test_cases/features/attributes/default_attributes.xsd +++ b/xmlschema/tests/test_cases/features/attributes/default_attributes.xsd @@ -11,7 +11,7 @@ - + diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py index 651281e5..21698e17 100644 --- a/xmlschema/tests/test_factory/validation_tests.py +++ b/xmlschema/tests/test_factory/validation_tests.py @@ -125,6 +125,7 @@ def check_etree_encode(self, root, converter=None, **kwargs): if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict: if debug_mode: pdb.set_trace() + breakpoint() raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original") elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): return # can't check encode equivalence if xsi:type is provided diff --git a/xmlschema/tests/test_resources.py b/xmlschema/tests/test_resources.py index 15343931..98b27458 100644 --- a/xmlschema/tests/test_resources.py +++ b/xmlschema/tests/test_resources.py @@ -15,6 +15,7 @@ import unittest import os import platform +import warnings try: from pathlib import PureWindowsPath, PurePath @@ -24,7 +25,7 @@ from xmlschema import ( fetch_namespaces, fetch_resource, normalize_url, fetch_schema, fetch_schema_locations, - load_xml_resource, XMLResource, XMLSchemaURLError, XMLSchema + load_xml_resource, XMLResource, XMLSchemaURLError, XMLSchema, XMLSchema10, XMLSchema11 ) from xmlschema.tests import SKIP_REMOTE_TESTS, casepath from xmlschema.compat import urlopen, urlsplit, uses_relative, StringIO @@ -32,6 +33,7 @@ etree_element, py_etree_element from xmlschema.namespaces import XSD_NAMESPACE from xmlschema.helpers import is_etree_element +from xmlschema.documents import get_context def is_windows_path(path): @@ -168,13 +170,53 @@ def test_fetch_schema_locations(self): self.check_url(fetch_schema(self.vh_xml_file), self.vh_xsd_file) def test_load_xml_resource(self): - self.assertTrue(is_etree_element(load_xml_resource(self.vh_xml_file, element_only=True))) - root, text, url = load_xml_resource(self.vh_xml_file, element_only=False) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertTrue(is_etree_element(load_xml_resource(self.vh_xml_file, element_only=True))) + root, text, url = load_xml_resource(self.vh_xml_file, element_only=False) + self.assertTrue(is_etree_element(root)) self.assertEqual(root.tag, '{http://example.com/vehicles}vehicles') self.assertTrue(text.startswith(' + + + """) + self.assertEqual(set(resource.get_namespaces().keys()), {'', 'tns', 'default'}) + + resource = XMLResource(""" + + + """) + self.assertEqual(set(resource.get_namespaces().keys()), {'default', 'tns'}) + + resource = XMLResource(""" + + + """) + self.assertEqual(set(resource.get_namespaces().keys()), {'default', 'tns', 'tns0'}) + def test_xml_resource_get_locations(self): resource = XMLResource(self.col_xml_file) self.check_url(resource.url, normalize_url(self.col_xml_file)) diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index 2170e46f..f695af3a 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -96,8 +96,11 @@ '../msData/schema/schZ015.xsd', # schemaLocation="" # Invalid XML tests - '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078 - '../msData/additional/test93490_8.xml', # 4799: Idem + '../sunData/combined/xsd005/xsd005.n05.xml', # 3984: Invalid if lxml is used (xsi:type and duplicate prefix) + '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078 + '../msData/additional/test93490_8.xml', # 4799: Idem + '../msData/datatypes/gMonth002.xml', # 8017: gMonth bogus: conflicts with other invalid schema tests + '../msData/datatypes/gMonth004.xml', # 8019: (http://www.w3.org/Bugs/Public/show_bug.cgi?id=6901) # Valid XML tests '../ibmData/instance_invalid/S3_4_2_4/s3_4_2_4ii03.xml', # defaultAttributeApply is true (false in comment) diff --git a/xmlschema/tests/validation/test_decoding.py b/xmlschema/tests/validation/test_decoding.py index 93d20503..fd29c9fc 100644 --- a/xmlschema/tests/validation/test_decoding.py +++ b/xmlschema/tests/validation/test_decoding.py @@ -480,9 +480,9 @@ def test_dict_granularity(self): def test_any_type(self): any_type = xmlschema.XMLSchema.meta_schema.types['anyType'] xml_data_1 = ElementTree.Element('dummy') - self.assertEqual(any_type.decode(xml_data_1), (None, [], [])) + self.assertIsNone(any_type.decode(xml_data_1)) xml_data_2 = ElementTree.fromstring('\n \n \n') - self.assertEqual(any_type.decode(xml_data_2), (None, [], [])) # Currently no decoding yet + self.assertIsNone(any_type.decode(xml_data_2)) # Currently no decoding yet def test_choice_model_decoding(self): schema = xmlschema.XMLSchema(self.casepath('issues/issue_041/issue_041.xsd')) diff --git a/xmlschema/tests/validation/test_encoding.py b/xmlschema/tests/validation/test_encoding.py index 476588db..eced9fd4 100644 --- a/xmlschema/tests/validation/test_encoding.py +++ b/xmlschema/tests/validation/test_encoding.py @@ -306,7 +306,7 @@ def test_max_occurs_sequence(self): def test_encode_unordered_content(self): schema = self.get_schema(""" - + @@ -346,11 +346,11 @@ def test_encode_unordered_content(self): ) def test_encode_unordered_content_2(self): - '''Here we test with a default converter at the schema level''' + """Here we test with a default converter at the schema level""" schema = self.get_schema(""" - + @@ -365,23 +365,17 @@ def test_encode_unordered_content_2(self): expected=u'\nabc\n10\ntrue\n', indent=0, cdata_prefix='#' ) - self.check_encode( xsd_component=schema.elements['A'], data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello'), ('B3', True)]), - expected='\nhelloabc\n10\ntrue\n', - indent=0, cdata_prefix='#' - ) - self.check_encode( - xsd_component=schema.elements['A'], - data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello'), ('B3', True)]), - expected=u'\nabc\n10\nhello\ntrue\n', + expected=u'\nhelloabc\n10\ntrue\n', indent=0, cdata_prefix='#' ) + # FIXME: UnorderedConverter do not work, an XMLSchemaValidationError is expected!! self.check_encode( xsd_component=schema.elements['A'], data=ordered_dict_class([('B1', 'abc'), ('B2', 10), ('#1', 'hello')]), - expected=XMLSchemaValidationError, indent=0, cdata_prefix='#' + expected=u'\nhelloabc\n10\n', indent=0, cdata_prefix='#' ) def test_strict_trailing_content(self): diff --git a/xmlschema/tests/validation/test_validation.py b/xmlschema/tests/validation/test_validation.py index 083bbd1c..20757ea0 100644 --- a/xmlschema/tests/validation/test_validation.py +++ b/xmlschema/tests/validation/test_validation.py @@ -68,6 +68,10 @@ def test_document_validate_api(self): vh_2_xt = ElementTree.parse(vh_2_file) self.assertRaises(XMLSchemaValidationError, xmlschema.validate, vh_2_xt, self.vh_xsd_file) + # Issue #145 + with open(self.vh_xml_file) as f: + self.assertIsNone(xmlschema.validate(f, schema=self.vh_xsd_file)) + def test_document_validate_api_lazy(self): source = xmlschema.XMLResource(self.col_xml_file, lazy=True) namespaces = source.get_namespaces() @@ -83,6 +87,23 @@ def test_document_validate_api_lazy(self): self.assertIsNone(xmlschema.validate(self.col_xml_file, lazy=True)) + def test_document_is_valid_api(self): + self.assertTrue(xmlschema.is_valid(self.vh_xml_file)) + self.assertTrue(xmlschema.is_valid(self.vh_xml_file, use_defaults=False)) + + vh_2_file = self.casepath('examples/vehicles/vehicles-2_errors.xml') + self.assertFalse(xmlschema.is_valid(vh_2_file)) + + def test_document_iter_errors_api(self): + self.assertListEqual(list(xmlschema.iter_errors(self.vh_xml_file)), []) + self.assertListEqual(list(xmlschema.iter_errors(self.vh_xml_file, use_defaults=False)), []) + + vh_2_file = self.casepath('examples/vehicles/vehicles-2_errors.xml') + errors = list(xmlschema.iter_errors(vh_2_file)) + self.assertEqual(len(errors), 2) + self.assertIsInstance(errors[0], XMLSchemaValidationError) + self.assertIsInstance(errors[1], XMLSchemaValidationError) + def test_max_depth_argument(self): schema = self.schema_class(self.col_xsd_file) self.assertEqual( @@ -93,7 +114,7 @@ def test_max_depth_argument(self): xmlschema.limits.MAX_XML_DEPTH = 1 with self.assertRaises(XMLSchemaValidationError): - self.assertEqual(schema.decode(self.col_xml_file)) + schema.decode(self.col_xml_file) xmlschema.limits.MAX_XML_DEPTH = 9999 self.assertEqual( @@ -109,18 +130,14 @@ class TestValidation11(TestValidation): schema_class = XMLSchema11 def test_default_attributes(self): - """ - Root Node - """ xs = self.schema_class(self.casepath('features/attributes/default_attributes.xsd')) - self.assertTrue(xs.is_valid("" - " alpha" - " beta" + self.assertTrue(xs.is_valid("\n" + " alpha\n" + " beta\n" "")) - self.assertFalse(xs.is_valid("" - " alpha" # Misses required attribute - " beta" + self.assertFalse(xs.is_valid("\n" + " alpha\n" # Misses required attribute + " beta\n" "")) diff --git a/xmlschema/tests/validators/test_schema_class.py b/xmlschema/tests/validators/test_schema_class.py index 1253a473..b7877060 100644 --- a/xmlschema/tests/validators/test_schema_class.py +++ b/xmlschema/tests/validators/test_schema_class.py @@ -76,7 +76,7 @@ def test_wrong_includes_and_imports(self): self.assertEqual(context[2].category, XMLSchemaImportWarning) self.assertTrue(str(context[0].message).startswith("Include")) self.assertTrue(str(context[1].message).startswith("Redefine")) - self.assertTrue(str(context[2].message).startswith("Namespace import")) + self.assertTrue(str(context[2].message).startswith("Import of namespace")) def test_wrong_references(self): # Wrong namespace for element type's reference diff --git a/xmlschema/validators/assertions.py b/xmlschema/validators/assertions.py index ee7ae197..9f084626 100644 --- a/xmlschema/validators/assertions.py +++ b/xmlschema/validators/assertions.py @@ -39,6 +39,7 @@ class XsdAssert(XsdComponent, ElementPathMixin): def __init__(self, elem, schema, parent, base_type): self.base_type = base_type super(XsdAssert, self).__init__(elem, schema, parent) + ElementPathMixin.__init__(self) def __repr__(self): return '%s(test=%r)' % (self.__class__.__name__, self.path) diff --git a/xmlschema/validators/attributes.py b/xmlschema/validators/attributes.py index cbdc1e9a..98204b3f 100644 --- a/xmlschema/validators/attributes.py +++ b/xmlschema/validators/attributes.py @@ -20,7 +20,7 @@ from ..qnames import XSD_ANNOTATION, XSD_ANY_SIMPLE_TYPE, XSD_SIMPLE_TYPE, \ XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_RESTRICTION, XSD_EXTENSION, \ XSD_SEQUENCE, XSD_ALL, XSD_CHOICE, XSD_ATTRIBUTE, XSD_ANY_ATTRIBUTE, \ - get_namespace, get_qname + XSD_ASSERT, get_namespace, get_qname from ..helpers import get_xsd_form_attribute from ..namespaces import XSI_NAMESPACE @@ -406,7 +406,7 @@ def _parse(self): if any_attribute is not None: if child.tag == XSD_ANY_ATTRIBUTE: self.parse_error("more anyAttribute declarations in the same attribute group") - else: + elif child.tag != XSD_ASSERT: self.parse_error("another declaration after anyAttribute") elif child.tag == XSD_ANY_ATTRIBUTE: @@ -632,7 +632,8 @@ def iter_decode(self, attrs, validation='lax', **kwargs): yield self.validation_error(validation, reason, attrs, **kwargs) continue else: - if xsd_attribute.use == 'prohibited': + if xsd_attribute.use == 'prohibited' and \ + (None not in self or not self[None].is_matching(name)): reason = "use of attribute %r is prohibited" % name yield self.validation_error(validation, reason, attrs, **kwargs) diff --git a/xmlschema/validators/builtins.py b/xmlschema/validators/builtins.py index 682e879d..545dc7cb 100644 --- a/xmlschema/validators/builtins.py +++ b/xmlschema/validators/builtins.py @@ -37,7 +37,7 @@ XSD_DATETIME, XSD_DATE_TIME_STAMP, XSD_ENTITY, XSD_ANY_URI, XSD_BOOLEAN, \ XSD_DURATION, XSD_DAY_TIME_DURATION, XSD_YEAR_MONTH_DURATION, XSD_BASE64_BINARY, \ XSD_HEX_BINARY, XSD_NOTATION_TYPE, XSD_ERROR, XSD_ASSERTION, XSD_SIMPLE_TYPE, \ - XSD_COMPLEX_TYPE, XSD_ANY_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ANY_SIMPLE_TYPE + XSD_ANY_TYPE, XSD_ANY_ATOMIC_TYPE, XSD_ANY_SIMPLE_TYPE from ..etree import etree_element from ..helpers import is_etree_element from .exceptions import XMLSchemaValidationError @@ -560,15 +560,7 @@ def xsd_builtin_types_factory(meta_schema, xsd_types, atomic_builtin_class=None) # # xs:anyType # Ref: https://www.w3.org/TR/xmlschema11-1/#builtin-ctd - any_type = meta_schema.BUILDERS.complex_type_class( - elem=etree_element(XSD_COMPLEX_TYPE, name=XSD_ANY_TYPE), - schema=meta_schema, - parent=None, - mixed=True - ) - any_type.content_type = meta_schema.create_any_content_group(any_type) - any_type.attributes = meta_schema.create_any_attribute_group(any_type) - xsd_types[XSD_ANY_TYPE] = any_type + xsd_types[XSD_ANY_TYPE] = meta_schema.create_any_type() # xs:anySimpleType # Ref: https://www.w3.org/TR/xmlschema11-2/#builtin-stds diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index edfe0b17..a33649f0 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -17,9 +17,10 @@ XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, \ get_qname, local_name from ..helpers import get_xsd_derivation_attribute +from ..converters import ElementData from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError -from .xsdbase import XsdType, ValidationMixin +from .xsdbase import XsdComponent, XsdType, ValidationMixin from .assertions import XsdAssert from .attributes import XsdAttributeGroup from .simple_types import XsdSimpleType @@ -575,89 +576,44 @@ def decode(self, data, *args, **kwargs): def iter_decode(self, elem, validation='lax', **kwargs): """ - Decode an Element instance. + Decode an Element instance. A dummy element is created for the type and it's + used for decode data. Typically used for decoding with xs:anyType when an XSD + element is not available. :param elem: the Element that has to be decoded. :param validation: the validation mode. Can be 'lax', 'strict' or 'skip. :param kwargs: keyword arguments for the decoding process. - :return: yields a 3-tuple (simple content, complex content, attributes) containing \ - the decoded parts, eventually preceded by a sequence of validation or decoding errors. + :return: yields a decoded object, eventually preceded by a sequence of \ + validation or decoding errors. """ - if self.is_empty() and elem.text: - reason = "character data between child elements not allowed because the type's content is empty" - yield self.validation_error(validation, reason, elem, **kwargs) + xsd_element = self.schema.create_element(name=elem.tag) + xsd_element.type = self + for result in xsd_element.iter_decode(elem, validation, **kwargs): + yield result - # XSD 1.1 assertions - for assertion in self.assertions: - for error in assertion(elem, **kwargs): - yield self.validation_error(validation, error, **kwargs) - - for result in self.attributes.iter_decode(elem.attrib, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - attributes = result - break - else: - attributes = None - - if self.has_simple_content(): - if len(elem) and validation != 'skip': - reason = "a simple content element can't has child elements." - yield self.validation_error(validation, reason, elem, **kwargs) - - if elem.text is not None: - text = elem.text or kwargs.pop('default', '') - for result in self.content_type.iter_decode(text, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - yield result, None, attributes - else: - yield None, None, attributes - else: - for result in self.content_type.iter_decode(elem, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - yield None, result, attributes - - def iter_encode(self, element_data, validation='lax', **kwargs): + def iter_encode(self, obj, validation='lax', **kwargs): """ - Encode an element data instance. + Encode XML data. A dummy element is created for the type and it's used for + encode data. Typically used for encoding with xs:anyType when an XSD element + is not available. - :param element_data: an ElementData instance with unencoded data. + :param obj: decoded XML data. :param validation: the validation mode: can be 'lax', 'strict' or 'skip'. :param kwargs: keyword arguments for the encoding process. - :return: yields a 3-tuple (text, content, attributes) containing the encoded parts, \ - eventually preceded by a sequence of validation or decoding errors. + :return: yields an Element, eventually preceded by a sequence of \ + validation or encoding errors. """ - for result in self.attributes.iter_encode(element_data.attributes, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - attributes = result - break - else: - attributes = () + name, value = obj + xsd_element = self.schema.create_element(name=name) + xsd_element.type = self - if self.has_simple_content(): - if element_data.text is None: - yield None, element_data.content, attributes - else: - for result in self.content_type.iter_encode(element_data.text, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - yield result, element_data.content, attributes - else: - for result in self.content_type.iter_encode(element_data, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): + if isinstance(value, list): + for item in value: + for result in xsd_element.iter_encode(item, validation, **kwargs): yield result - elif result: - yield result[0], result[1], attributes - else: - yield None, None, attributes + else: + for result in xsd_element.iter_encode(value, validation, **kwargs): + yield result class Xsd11ComplexType(XsdComplexType): @@ -742,12 +698,10 @@ def _parse(self): self.default_attributes_apply = True # Add default attributes - if self.default_attributes_apply: - default_attributes = self.default_attributes - if default_attributes is not None: - if self.redefine is None and any(k in self.attributes for k in default_attributes): - self.parse_error("at least a default attribute is already declared in the complex type") - self.attributes.update((k, v) for k, v in default_attributes.items()) + if self.default_attributes_apply and isinstance(self.default_attributes, XsdComponent): + if self.redefine is None and any(k in self.attributes for k in self.default_attributes): + self.parse_error("at least a default attribute is already declared in the complex type") + self.attributes.update((k, v) for k, v in self.default_attributes.items()) def _parse_complex_content_extension(self, elem, base_type): # Complex content extension with simple base is forbidden XSD 1.1. diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 636c5375..a9e336f9 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -22,9 +22,10 @@ XSD_CHOICE, XSD_ATTRIBUTE_GROUP, XSD_COMPLEX_TYPE, XSD_SIMPLE_TYPE, \ XSD_ALTERNATIVE, XSD_ELEMENT, XSD_ANY_TYPE, XSD_UNIQUE, XSD_KEY, \ XSD_KEYREF, XSI_NIL, XSI_TYPE, XSD_ERROR, get_qname -from ..etree import etree_element +from ..etree import etree_element, etree_iter_location_hints from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute, \ ParticleCounter, strictly_equal +from ..namespaces import get_namespace from ..converters import ElementData, raw_xml_encode, XMLSchemaConverter from ..xpath import XMLSchemaProxy, ElementPathMixin @@ -79,10 +80,7 @@ class XsdElement(XsdComponent, ValidationMixin, ParticleMixin, ElementPathMixin) def __init__(self, elem, schema, parent): super(XsdElement, self).__init__(elem, schema, parent) - if self.qualified or self.ref is not None or 'targetNamespace' in elem.attrib: - self.names = (self.qualified_name,) - else: - self.names = (self.qualified_name, self.local_name) + ElementPathMixin.__init__(self) if self.type is None: raise XMLSchemaAttributeError("undefined 'type' attribute for %r." % self) if self.qualified is None: @@ -323,9 +321,6 @@ def _parse_substitution_group(self, substitution_group): msg = "head element %r can't be substituted by an element that has a restriction of its type" self.parse_error(msg % head_element) - if self.type.name == XSD_ANY_TYPE and 'type' not in self.elem.attrib: - self.type = self.maps.elements[substitution_group_qname].type - try: self.maps.substitution_groups[substitution_group_qname].add(self) except KeyError: @@ -456,6 +451,40 @@ def data_value(self, elem): text = self.fixed if self.fixed is not None else self.default return self.type.text_decode(text) + def check_dynamic_context(self, elem, **kwargs): + try: + locations = kwargs['locations'] + except KeyError: + return + + for ns, url in etree_iter_location_hints(elem): + if ns not in locations: + locations[ns] = url + elif locations[ns] is None: + reason = "schemaLocation declaration after namespace start" + raise XMLSchemaValidationError(self, elem, reason) + + if ns == self.target_namespace: + schema = self.schema.include_schema(url, self.schema.base_url) + else: + schema = self.schema.import_namespace(ns, url, self.schema.base_url) + + if not schema.built: + reason = "dynamic loaded schema change the assessment" + raise XMLSchemaValidationError(self, elem, reason) + + if elem.attrib: + for name in elem.attrib: + if name[0] == '{': + ns = get_namespace(name) + if ns not in locations: + locations[ns] = None + + if elem.tag[0] == '{': + ns = get_namespace(elem.tag) + if ns not in locations: + locations[ns] = None + def iter_decode(self, elem, validation='lax', **kwargs): """ Creates an iterator for decoding an Element instance. @@ -467,7 +496,13 @@ def iter_decode(self, elem, validation='lax', **kwargs): validation or decoding errors. """ if self.abstract: - yield self.validation_error(validation, "cannot use an abstract element for validation", elem, **kwargs) + msg = "cannot use an abstract element for validation" + yield self.validation_error(validation, msg, elem, **kwargs) + + try: + namespaces = kwargs['namespaces'] + except KeyError: + namespaces = None try: level = kwargs['level'] @@ -477,10 +512,15 @@ def iter_decode(self, elem, validation='lax', **kwargs): try: converter = kwargs['converter'] except KeyError: - converter = kwargs['converter'] = self.get_converter(**kwargs) + converter = kwargs['converter'] = self.schema.get_converter(**kwargs) else: - if not isinstance(converter, XMLSchemaConverter): - converter = kwargs['converter'] = self.get_converter(**kwargs) + if not isinstance(converter, XMLSchemaConverter) and converter is not None: + converter = kwargs['converter'] = self.schema.get_converter(**kwargs) + + try: + pass # self.check_dynamic_context(elem, **kwargs) + except XMLSchemaValidationError as err: + yield self.validation_error(validation, err, elem, **kwargs) inherited = kwargs.get('inherited') value = content = attributes = None @@ -490,7 +530,7 @@ def iter_decode(self, elem, validation='lax', **kwargs): if XSI_TYPE in elem.attrib: type_name = elem.attrib[XSI_TYPE].strip() try: - xsd_type = self.maps.get_instance_type(type_name, xsd_type, converter) + xsd_type = self.maps.get_instance_type(type_name, xsd_type, namespaces) except (KeyError, TypeError) as err: yield self.validation_error(validation, err, elem, **kwargs) @@ -523,12 +563,16 @@ def iter_decode(self, elem, validation='lax', **kwargs): yield self.validation_error(validation, reason, elem, **kwargs) elif xsi_nil in ('0', 'false'): pass + elif self.fixed is not None: + reason = "xsi:nil='true' but the element has a fixed value." + yield self.validation_error(validation, reason, elem, **kwargs) elif elem.text is not None or len(elem): reason = "xsi:nil='true' but the element is not empty." yield self.validation_error(validation, reason, elem, **kwargs) else: - element_data = ElementData(elem.tag, None, None, attributes) - yield converter.element_decode(element_data, self, level) + if converter is not None: + element_data = ElementData(elem.tag, None, None, attributes) + yield converter.element_decode(element_data, self, level) return if xsd_type.is_empty() and elem.text: @@ -602,8 +646,9 @@ def iter_decode(self, elem, validation='lax', **kwargs): except KeyError: value = elem.text - element_data = ElementData(elem.tag, value, content, attributes) - yield converter.element_decode(element_data, self, level) + if converter is not None: + element_data = ElementData(elem.tag, value, content, attributes) + yield converter.element_decode(element_data, self, level) if content is not None: del content @@ -611,11 +656,11 @@ def iter_decode(self, elem, validation='lax', **kwargs): if 'max_depth' in kwargs: # Don't check key references with lazy or shallow validation for constraint in filter(lambda x: not isinstance(x, XsdKeyref), self.identities.values()): - for error in constraint(elem, converter): + for error in constraint(elem, namespaces): yield self.validation_error(validation, error, elem, **kwargs) else: for constraint in self.identities.values(): - for error in constraint(elem, converter): + for error in constraint(elem, namespaces): yield self.validation_error(validation, error, elem, **kwargs) def iter_encode(self, obj, validation='lax', **kwargs): @@ -631,10 +676,10 @@ def iter_encode(self, obj, validation='lax', **kwargs): try: converter = kwargs['converter'] except KeyError: - converter = kwargs['converter'] = self.get_converter(**kwargs) + converter = kwargs['converter'] = self.schema.get_converter(**kwargs) else: if not isinstance(converter, XMLSchemaConverter): - converter = kwargs['converter'] = self.get_converter(**kwargs) + converter = kwargs['converter'] = self.schema.get_converter(**kwargs) try: level = kwargs['level'] @@ -671,6 +716,8 @@ def iter_encode(self, obj, validation='lax', **kwargs): errors.append("xsi:nil attribute must has a boolean value.") elif xsi_nil in ('0', 'false'): pass + elif self.fixed is not None: + errors.append("xsi:nil='true' but the element has a fixed value.") elif element_data.text is not None or element_data.content: errors.append("xsi:nil='true' but the element is not empty.") else: @@ -718,36 +765,29 @@ def iter_encode(self, obj, validation='lax', **kwargs): def is_matching(self, name, default_namespace=None, group=None): if default_namespace and name[0] != '{': qname = '{%s}%s' % (default_namespace, name) - if name in self.names or qname in self.names: + if name == self.name or qname == self.name: return True - - for xsd_element in self.iter_substitutes(): - if name in xsd_element.names or qname in xsd_element.names: - return True - - elif name in self.names: + return any(name == e.name or qname == e.name for e in self.iter_substitutes()) + elif name == self.name: return True else: - for xsd_element in self.iter_substitutes(): - if name in xsd_element.names: - return True - return False + return any(name == e.name for e in self.iter_substitutes()) def match(self, name, default_namespace=None, **kwargs): if default_namespace and name[0] != '{': qname = '{%s}%s' % (default_namespace, name) - if name in self.names or qname in self.names: + if name == self.name or qname == self.name: return self for xsd_element in self.iter_substitutes(): - if name in xsd_element.names or qname in xsd_element.names: + if name == xsd_element.name or qname == xsd_element.name: return xsd_element - elif name in self.names: + elif name == self.name: return self else: for xsd_element in self.iter_substitutes(): - if name in xsd_element.names: + if name == xsd_element.name: return xsd_element def is_restriction(self, other, check_occurs=True): @@ -758,9 +798,7 @@ def is_restriction(self, other, check_occurs=True): return False return other.is_matching(self.name, self.default_namespace) elif isinstance(other, XsdElement): - if self.name == other.name: - pass - elif any(n not in other.names for n in self.names): + if self.name != other.name: if other.name == self.substitution_group and \ other.min_occurs != other.max_occurs and \ self.max_occurs != 0 and not other.abstract \ @@ -777,8 +815,6 @@ def is_restriction(self, other, check_occurs=True): break else: return False - else: - return False if check_occurs and not self.has_occurs_restriction(other): return False @@ -1003,23 +1039,26 @@ def is_consistent(self, other, strict=True): xsd_element = other.match(self.name, self.default_namespace, resolve=True) return xsd_element is None or self.is_consistent(xsd_element, strict=False) - if self.name == other.name: - e = self - else: - for e in self.iter_substitutes(): - if e.name == other.name: + e1, e2 = self, other + if self.name != other.name: + for e1 in self.iter_substitutes(): + if e1.name == other.name: break else: - return True + for e2 in other.iter_substitutes(): + if e2.name == self.name: + break + else: + return True - if len(e.alternatives) != len(other.alternatives): + if len(e1.alternatives) != len(e2.alternatives): return False - elif e.type is not other.type and strict: + elif e1.type is not e2.type and strict: return False - elif e.type is not other.type or \ - not all(any(a == x for x in other.alternatives) for a in e.alternatives) or \ - not all(any(a == x for x in e.alternatives) for a in other.alternatives): - msg = "Maybe a not equivalent type table between elements %r and %r." % (self, other) + elif e1.type is not e2.type or \ + not all(any(a == x for x in e2.alternatives) for a in e1.alternatives) or \ + not all(any(a == x for x in e1.alternatives) for a in e2.alternatives): + msg = "Maybe a not equivalent type table between elements %r and %r." % (e1, e2) warnings.warn(msg, XMLSchemaTypeTableWarning, stacklevel=3) return True diff --git a/xmlschema/validators/globals_.py b/xmlschema/validators/globals_.py index c469b631..12c51c96 100644 --- a/xmlschema/validators/globals_.py +++ b/xmlschema/validators/globals_.py @@ -449,7 +449,7 @@ def clear(self, remove_schemas=False, only_unbuilt=False): :param only_unbuilt: removes only not built objects/schemas. """ if only_unbuilt: - not_built_schemas = {schema for schema in self.iter_schemas() if not schema.built} + not_built_schemas = {s for s in self.iter_schemas() if not s.built} if not not_built_schemas: return @@ -530,6 +530,11 @@ def build(self): if not meta_schema.built: xsd_builtin_types_factory(meta_schema, self.types) + if self is not meta_schema.maps: + # Rebuild xs:anyType for maps not owned by the meta-schema + # in order to do a correct namespace lookup for wildcards. + self.types[XSD_ANY_TYPE] = self.validator.create_any_type() + for qname in self.notations: self.lookup_notation(qname) for qname in self.attributes: @@ -603,12 +608,9 @@ def check(self, schemas=None, validation='strict'): # Checks substitution groups circularities for qname in self.substitution_groups: xsd_element = self.elements[qname] - for e in xsd_element.iter_substitutes(): - if e is xsd_element: - msg = "circularity found for substitution group with head element %r" - e.parse_error(msg.format(e), validation=validation) - elif e.abstract and e.name not in self.substitution_groups and self.xsd_version > '1.0': - self.parse_error("in XSD 1.1 an abstract element cannot be member of a substitution group") + if any(e is xsd_element for e in xsd_element.iter_substitutes()): + msg = "circularity found for substitution group with head element %r" + xsd_element.parse_error(msg.format(xsd_element), validation=validation) if validation == 'strict' and not self.built: raise XMLSchemaNotBuiltError(self, "global map has unbuilt components: %r" % self.unbuilt) @@ -617,7 +619,7 @@ def check(self, schemas=None, validation='strict'): for group in filter(lambda x: x.schema in schemas and x.redefine is not None, self.groups.values()): if not any(isinstance(e, XsdGroup) and e.name == group.name for e in group) \ and not group.is_restriction(group.redefine): - msg = "The redefined group is an illegal restriction of the original group." + msg = "the redefined group is an illegal restriction of the original group" group.parse_error(msg, validation=validation) # Check complex content types models restrictions @@ -630,7 +632,7 @@ def check(self, schemas=None, validation='strict'): base_type = xsd_type.base_type if base_type and base_type.name != XSD_ANY_TYPE and base_type.is_complex(): if not xsd_type.content_type.is_restriction(base_type.content_type): - msg = "The derived group is an illegal restriction of the base type group." + msg = "the derived group is an illegal restriction of the base type group" xsd_type.parse_error(msg, validation=validation) if base_type.is_complex() and not base_type.open_content and \ @@ -640,7 +642,8 @@ def check(self, schemas=None, validation='strict'): any_element=xsd_type.open_content.any_element ) if not group.is_restriction(base_type.content_type): - self.parse_error("restriction has an open content but base type has not") + msg = "restriction has an open content but base type has not" + group.parse_error(msg, validation=validation) try: xsd_type.content_type.check_model() diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index 23ccbad7..34b90742 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -38,6 +38,10 @@ }) +def not_whitespace(s): + return s and s.strip() + + class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): """ Class for XSD 1.0 *model group* definitions. @@ -529,7 +533,7 @@ def check_dynamic_context(self, elem, xsd_element, model_element, converter): if xsd_type is not model_element.type and \ xsd_type.is_derived(model_element.type, derivation): reason = "usage of %r with type %s is blocked by head element" - raise XMLSchemaValidationError(self, reason % (xsd_element, derivation)) + raise XMLSchemaValidationError(self, elem, reason % (xsd_element, derivation)) if XSI_TYPE not in elem.attrib: return @@ -567,9 +571,6 @@ def iter_decode(self, elem, validation='lax', **kwargs): :return: yields a list of 3-tuples (key, decoded data, decoder), \ eventually preceded by a sequence of validation or decoding errors. """ - def not_whitespace(s): - return s is not None and s.strip() - result_list = [] cdata_index = 1 # keys for CDATA sections are positive integers @@ -595,11 +596,15 @@ def not_whitespace(s): self.validation_error('strict', reason, elem, **kwargs) try: - converter = kwargs['converter'] + namespaces = kwargs['namespaces'] except KeyError: - converter = kwargs['converter'] = self.get_converter(**kwargs) + namespaces = default_namespace = None + else: + try: + default_namespace = namespaces.get('') + except AttributeError: + default_namespace = None - default_namespace = converter.get('') model = ModelVisitor(self) errors = [] model_broken = False @@ -628,7 +633,7 @@ def not_whitespace(s): break try: - self.check_dynamic_context(child, xsd_element, model.element, converter) + self.check_dynamic_context(child, xsd_element, model.element, namespaces) except XMLSchemaValidationError as err: yield self.validation_error(validation, err, elem, **kwargs) @@ -694,13 +699,9 @@ def iter_encode(self, element_data, validation='lax', **kwargs): (key, decoded data, decoder), eventually preceded by a sequence of validation \ or encoding errors. """ - if not element_data.content: # or - yield element_data.content - return - level = kwargs['level'] = kwargs.get('level', 0) + 1 errors = [] - text = None + text = element_data.text children = [] try: indent = kwargs['indent'] @@ -712,15 +713,18 @@ def iter_encode(self, element_data, validation='lax', **kwargs): try: converter = kwargs['converter'] except KeyError: - converter = kwargs['converter'] = self.get_converter(**kwargs) + converter = kwargs['converter'] = self.schema.get_converter(**kwargs) default_namespace = converter.get('') model = ModelVisitor(self) - cdata_index = 0 + index = cdata_index = 0 + wrong_content_type = False if isinstance(element_data.content, dict) or kwargs.get('unordered'): content = model.iter_unordered_content(element_data.content) elif not isinstance(element_data.content, list): + if element_data.content is not None: + wrong_content_type = True content = [] elif converter.losslessly: content = element_data.content @@ -781,9 +785,8 @@ def iter_encode(self, element_data, validation='lax', **kwargs): children.append(result) if model.element is not None: - index = len(element_data.content) - cdata_index for particle, occurs, expected in model.stop(): - errors.append((index, particle, occurs, expected)) + errors.append((index - cdata_index, particle, occurs, expected)) if children: if children[-1].tail is None: @@ -791,7 +794,10 @@ def iter_encode(self, element_data, validation='lax', **kwargs): else: children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n') - if validation != 'skip' and (errors or not content): + cdata_not_allowed = not self.mixed and not_whitespace(text) and self and \ + (len(self) > 1 or not isinstance(self[0], XsdAnyElement)) + + if validation != 'skip' and (errors or cdata_not_allowed or wrong_content_type): attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()} if validation == 'lax' and converter.etree_element_class is not etree_element: child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children] @@ -799,12 +805,18 @@ def iter_encode(self, element_data, validation='lax', **kwargs): else: elem = converter.etree_element(element_data.tag, text, children, attrib) - if not content: + if wrong_content_type: reason = "wrong content type {!r}".format(type(element_data.content)) yield self.validation_error(validation, reason, elem, **kwargs) + if cdata_not_allowed: + reason = "character data between child elements not allowed" + yield self.validation_error(validation, reason, elem, **kwargs) + for index, particle, occurs, expected in errors: - yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs) + yield self.children_validation_error( + validation, elem, index, particle, occurs, expected, **kwargs + ) yield text, children diff --git a/xmlschema/validators/identities.py b/xmlschema/validators/identities.py index 65b4fd31..7f240451 100644 --- a/xmlschema/validators/identities.py +++ b/xmlschema/validators/identities.py @@ -134,7 +134,7 @@ def _parse(self): if child.tag == XSD_FIELD: self.fields.append(XsdFieldSelector(child, self.schema, self)) else: - self.parse_error("element %r not allowed here:" % child.tag, elem) + self.parse_error("%r is not allowed here" % child, elem) def _parse_identity_reference(self): super(XsdIdentity, self)._parse() @@ -181,7 +181,7 @@ def get_fields(self, context, namespaces=None, decoders=None): raise XMLSchemaValueError("%r field selects multiple values!" % field) return tuple(fields) - def iter_values(self, elem, namespaces): + def iter_values(self, elem, namespaces=None): """ Iterate field values, excluding empty values (tuples with all `None` values). @@ -216,7 +216,7 @@ def iter_values(self, elem, namespaces): def built(self): return self.selector is not None - def __call__(self, elem, namespaces): + def __call__(self, elem, namespaces=None): values = Counter() for v in self.iter_values(elem, namespaces): if isinstance(v, XMLSchemaValidationError): @@ -301,7 +301,7 @@ def parse_refer(self): def built(self): return self.selector is not None and isinstance(self.refer, XsdIdentity) - def get_refer_values(self, elem, namespaces): + def get_refer_values(self, elem, namespaces=None): values = set() for e in elem.iterfind(self.refer_path): for v in self.refer.iter_values(e, namespaces): @@ -309,7 +309,7 @@ def get_refer_values(self, elem, namespaces): values.add(v) return values - def __call__(self, elem, namespaces): + def __call__(self, elem, namespaces=None): if self.refer is None: return diff --git a/xmlschema/validators/models.py b/xmlschema/validators/models.py index 96cfaaf2..ea717360 100644 --- a/xmlschema/validators/models.py +++ b/xmlschema/validators/models.py @@ -431,8 +431,6 @@ def iter_group(self): """Returns an iterator for the current model group.""" if self.group.model != 'all': return iter(self.group) - elif not self.occurs: - return self.group.iter_elements() else: return (e for e in self.group.iter_elements() if not e.is_over(self.occurs[e])) diff --git a/xmlschema/validators/schema.py b/xmlschema/validators/schema.py index b2d5d347..6b5c2184 100644 --- a/xmlschema/validators/schema.py +++ b/xmlschema/validators/schema.py @@ -19,6 +19,7 @@ from collections import namedtuple, Counter from abc import ABCMeta import logging +import threading import warnings import re import sys @@ -32,7 +33,7 @@ XSD_ANNOTATION, XSD_NOTATION, XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_GROUP, \ XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_SEQUENCE, XSD_CHOICE, \ XSD_ALL, XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_INCLUDE, XSD_IMPORT, XSD_REDEFINE, \ - XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT + XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT, XSD_ANY_TYPE, XSI_TYPE from ..helpers import get_xsd_derivation_attribute, get_xsd_form_attribute from ..namespaces import XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, VC_NAMESPACE, \ SCHEMAS_DIR, LOCATION_HINTS, NamespaceResourcesMap, NamespaceView, get_namespace @@ -95,6 +96,7 @@ def get_attribute(attr, *args): # Defining a subclass without a meta-schema (eg. XMLSchemaBase) return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_) dict_['meta_schema'] = None + dict_['lock'] = threading.Lock() # Lock instance for shared meta-schemas xsd_version = dict_.get('XSD_VERSION') or get_attribute('XSD_VERSION', *bases) if xsd_version not in ('1.0', '1.1'): @@ -123,10 +125,11 @@ def get_attribute(attr, *args): meta_schema_class.__qualname__ = meta_schema_class_name globals()[meta_schema_class_name] = meta_schema_class - # Build the new meta-schema instance + # Build the shared meta-schema instance schema_location = meta_schema.url if isinstance(meta_schema, XMLSchemaBase) else meta_schema meta_schema = meta_schema_class.create_meta_schema(schema_location) dict_['meta_schema'] = meta_schema + dict_.pop('lock') return super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_) @@ -170,9 +173,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :type timeout: int :param build: defines whether build the schema maps. Default is `True`. :type build: bool - :param use_meta: if `True` the schema processor uses the package meta-schema, otherwise the \ - meta-schema is added at the end. In the latter case the meta-schema is rebuilt if any base \ - namespace has been overridden by an import. Ignored if the argument *global_maps* is provided. + :param use_meta: if `True` the schema processor uses the package meta-schema, \ + otherwise a new meta-schema is added at the end. In the latter case the meta-schema \ + is rebuilt if any base namespace has been overridden by an import. Ignored if the \ + argument *global_maps* is provided. :type use_meta: bool :param loglevel: for setting a different logging level for schema initialization \ and building. For default is WARNING (30). For INFO level set it with 20, for \ @@ -204,6 +208,10 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): :vartype final_default: str :cvar default_attributes: the XSD 1.1 schema's *defaultAttributes* attribute, defaults to ``None``. :vartype default_attributes: XsdAttributeGroup + :cvar xpath_tokens: symbol table for schema bound XPath 2.0 parsers. Initially set to \ + ``None`` it's redefined at instance level with a dictionary at first use of the XPath \ + selector. The parser symbol table is extended with schema types constructors. + :vartype xpath_tokens: dict :ivar target_namespace: is the *targetNamespace* of the schema, the namespace to which \ belong the declarations/definitions of the schema. If it's empty no namespace is associated \ @@ -262,11 +270,14 @@ class XMLSchemaBase(XsdValidator, ValidationMixin, ElementPathMixin): default_attributes = None default_open_content = None override = None + xpath_tokens = None def __init__(self, source, namespace=None, validation='strict', global_maps=None, converter=None, locations=None, base_url=None, defuse='remote', timeout=300, build=True, use_meta=True, loglevel=None): super(XMLSchemaBase, self).__init__(validation) + ElementPathMixin.__init__(self) + if loglevel is not None: logger.setLevel(loglevel) elif build and global_maps is None: @@ -282,9 +293,9 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None root = self.source.root # Parse namespaces and targetNamespace - self.namespaces = {'xml': XML_NAMESPACE} # the XML namespace is implicit - self.namespaces.update(self.source.get_namespaces()) - + self.namespaces = self.source.get_namespaces( + namespaces={'xml': XML_NAMESPACE} # the XML namespace is implicitly declared + ) try: self.target_namespace = root.attrib['targetNamespace'] except KeyError: @@ -339,31 +350,29 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None self.locations = NamespaceResourcesMap(self.source.get_locations(locations)) self.converter = self.get_converter(converter) - self.xpath_tokens = {} - # Create or set the XSD global maps instance if self.meta_schema is None: + # Meta-schema creation phase (MetaXMLSchema class) self.maps = global_maps or XsdGlobals(self) for child in filter(lambda x: x.tag == XSD_OVERRIDE, self.root): self.include_schema(child.attrib['schemaLocation'], self.base_url) - return # Meta-schemas don't need to be checked or built and don't process imports - elif global_maps is None: - if use_meta is False: - self.maps = XsdGlobals(self, validation) - elif self.target_namespace not in self.BASE_SCHEMAS: - if not self.meta_schema.maps.types: - self.meta_schema.maps.build() + return # Meta-schemas don't need to be checked and don't process imports + + with self.meta_schema.lock: + if not self.meta_schema.maps.types: + self.meta_schema.maps.build() + + # Create or set the XSD global maps instance + if global_maps is None: + if use_meta and self.target_namespace not in self.meta_schema.maps.namespaces: self.maps = self.meta_schema.maps.copy(self, validation=validation) else: - base_schemas = {k: v for k, v in self.BASE_SCHEMAS.items() if k != self.target_namespace} - meta_schema = self.create_meta_schema(base_schemas=base_schemas) - self.maps = meta_schema.maps - self.meta_schema = meta_schema + self.maps = XsdGlobals(self, validation) elif isinstance(global_maps, XsdGlobals): self.maps = global_maps else: - raise XMLSchemaTypeError("'global_maps' argument must be a %r instance." % XsdGlobals) + raise XMLSchemaTypeError("'global_maps' argument must be an %r instance." % XsdGlobals) if self.XSD_VERSION > '1.0' and any(ns == VC_NAMESPACE for ns in self.namespaces.values()): # For XSD 1.1+ apply versioning filter to schema tree. See the paragraph @@ -375,12 +384,7 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None del root.attrib[k] # Validate the schema document (transforming validation errors to parse errors) - if validation == 'strict': - try: - self.check_schema(root, self.namespaces) - except XMLSchemaValidationError as e: - self.parse_error(e.reason, elem=e.elem) - elif validation == 'lax': + if validation != 'skip': for e in self.meta_schema.iter_errors(root, namespaces=self.namespaces): self.parse_error(e.reason, elem=e.elem) @@ -415,16 +419,6 @@ def __init__(self, source, namespace=None, validation='strict', global_maps=None if loglevel is not None: logger.setLevel(logging.WARNING) # Restore default logging - def __getstate__(self): - state = self.__dict__.copy() - del state['xpath_tokens'] - state.pop('_xpath_parser', None) - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.xpath_tokens = {} - def __repr__(self): if self.url: basename = os.path.basename(self.url) @@ -607,6 +601,8 @@ def constraints(self): """ Old reference to identity constraints, for backward compatibility. Will be removed in v1.1.0. """ + warnings.warn("'constraints' property has been replaced by 'identities' " + "and will be removed in 1.1 version.", DeprecationWarning) return self.identities @classmethod @@ -653,6 +649,7 @@ def create_meta_schema(cls, source=None, base_schemas=None, global_maps=None): @classmethod def create_schema(cls, *args, **kwargs): """Creates a new schema instance of the same class of the caller.""" + warnings.warn("'create_schema()' method will be removed in 1.1 version.", DeprecationWarning) return cls(*args, **kwargs) def create_any_content_group(self, parent, any_element=None): @@ -712,6 +709,30 @@ def create_empty_attribute_group(self, parent): """ return self.BUILDERS.attribute_group_class(ATTRIBUTE_GROUP_ELEMENT, self, parent) + def create_any_type(self): + """ + Creates an xs:anyType instance related to schema instance. + """ + any_type = self.BUILDERS.complex_type_class( + elem=etree_element(XSD_COMPLEX_TYPE, name=XSD_ANY_TYPE), + schema=self, + parent=None, + mixed=True + ) + any_type.content_type = self.create_any_content_group(any_type) + any_type.attributes = self.create_any_attribute_group(any_type) + return any_type + + def create_element(self, name): + """ + Creates an xs:element instance related to schema instance. + """ + return self.BUILDERS.element_class( + elem=etree_element(XSD_ELEMENT, name=name), + schema=self, + parent=None, + ) + def copy(self): """ Makes a copy of the schema instance. The new instance has independent maps @@ -741,6 +762,9 @@ def check_schema(cls, schema, namespaces=None): :raises: :exc:`XMLSchemaValidationError` if the schema is invalid. """ + if not cls.meta_schema.maps.types: + cls.meta_schema.maps.build() + for error in cls.meta_schema.iter_errors(schema, namespaces=namespaces): raise error @@ -818,6 +842,27 @@ def iter_components(self, xsd_classes=None): for obj in xsd_global.iter_components(xsd_classes): yield obj + def get_converter(self, converter=None, namespaces=None, **kwargs): + """ + Returns a new converter instance. + + :param converter: can be a converter class or instance. If it's an instance \ + the new instance is copied from it and configured with the provided arguments. + :param namespaces: is an optional mapping from namespace prefix to URI. + :param kwargs: optional arguments for initialize the converter instance. + :return: a converter instance. + """ + if converter is None: + converter = getattr(self, 'converter', XMLSchemaConverter) + + if isinstance(converter, XMLSchemaConverter): + return converter.copy(namespaces=namespaces, **kwargs) + elif issubclass(converter, XMLSchemaConverter): + return converter(namespaces, **kwargs) + else: + msg = "'converter' argument must be a %r subclass or instance: %r" + raise XMLSchemaTypeError(msg % (XMLSchemaConverter, converter)) + def get_locations(self, namespace): """ Get a list of location hints for a namespace. @@ -829,7 +874,7 @@ def get_locations(self, namespace): def get_element(self, tag, path=None, namespaces=None): if not path: - return self.find(tag, namespaces) + return self.find(tag) elif path[-1] == '*': return self.find(path[:-1] + tag, namespaces) else: @@ -898,7 +943,7 @@ def include_schema(self, location, base_url=None): if schema_url == schema.url: break else: - schema = self.create_schema( + schema = type(self)( source=schema_url, namespace=self.target_namespace, validation=self.validation, @@ -910,9 +955,11 @@ def include_schema(self, location, base_url=None): build=False, ) - if location not in self.includes: + if schema is self: + return self + elif location not in self.includes: self.includes[location] = schema - elif self.includes[location] != schema: + elif self.includes[location] is not schema: self.includes[schema_url] = schema return schema @@ -996,8 +1043,9 @@ def _import_namespace(self, namespace, locations): break else: if import_error is not None: - self.warnings.append("Namespace import failed: %s." % str(import_error)) - warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=3) + msg = "Import of namespace {!r} from {!r} failed: {}." + self.warnings.append(msg.format(namespace, locations, str(import_error))) + warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=4) self.imports[namespace] = None def import_schema(self, namespace, location, base_url=None, force=False, build=False): @@ -1027,7 +1075,7 @@ def import_schema(self, namespace, location, base_url=None, force=False, build=F self.imports[namespace] = schema return schema - schema = self.create_schema( + schema = type(self)( source=schema_url, validation=self.validation, global_maps=self.maps, @@ -1170,7 +1218,7 @@ def validate(self, source, path=None, schema_path=None, use_defaults=True, names def is_valid(self, source, path=None, schema_path=None, use_defaults=True, namespaces=None): """ Like :meth:`validate` except that do not raises an exception but returns ``True`` if - the XML document is valid, ``False`` if it's invalid. + the XML data is valid, ``False`` if it's invalid. """ error = next(self.iter_errors(source, path, schema_path, use_defaults, namespaces), None) return error is None @@ -1196,17 +1244,15 @@ def iter_errors(self, source, path=None, schema_path=None, use_defaults=True, na self.build() if not isinstance(source, XMLResource): - source = XMLResource(source=source, defuse=self.defuse, timeout=self.timeout, lazy=False) + source = XMLResource(source, defuse=self.defuse, timeout=self.timeout, lazy=False) if not schema_path and path: schema_path = path if path.startswith('/') else '/%s/%s' % (source.root.tag, path) - namespaces = {} if namespaces is None else namespaces.copy() - namespaces.update(source.get_namespaces()) - id_map = Counter() - inherited = {} - + root_only = source.is_lazy() and not namespaces + namespaces = source.get_namespaces(namespaces, root_only) namespace = source.namespace or namespaces.get('', '') + try: schema = self.maps.namespaces[namespace][0] except (KeyError, IndexError): @@ -1217,17 +1263,23 @@ def iter_errors(self, source, path=None, schema_path=None, use_defaults=True, na kwargs = { 'source': source, 'namespaces': namespaces, + 'converter': None, 'use_defaults': use_defaults, 'id_map': id_map, - 'inherited': inherited + 'inherited': {}, } if source.is_lazy() and path is None: + kwargs['locations'] = {} # Lazy schema load + xsd_element = schema.get_element(source.root.tag, schema_path, namespaces) if xsd_element is None: - reason = "{!r} is not an element of the schema".format(source.root) - yield schema.validation_error('lax', reason, source.root, source, namespaces) - return + if XSI_TYPE in source.root.attrib: + xsd_element = self.create_element(name=source.root.tag) + else: + reason = "{!r} is not an element of the schema".format(source.root) + yield schema.validation_error('lax', reason, source.root, source, namespaces) + return for result in xsd_element.iter_decode(source.root, max_depth=1, **kwargs): if isinstance(result, XMLSchemaValidationError): @@ -1238,13 +1290,21 @@ def iter_errors(self, source, path=None, schema_path=None, use_defaults=True, na path = '*' if not schema_path: schema_path = '/%s/*' % source.root.tag + kwargs['inherited'].clear() + + if root_only: + # Tell to iterfind to catch namespace events and update map + namespaces.clear() for elem in source.iterfind(path, namespaces): xsd_element = schema.get_element(elem.tag, schema_path, namespaces) if xsd_element is None: - reason = "{!r} is not an element of the schema".format(elem) - yield schema.validation_error('lax', reason, elem, source, namespaces) - return + if XSI_TYPE in elem.attrib: + xsd_element = self.create_element(name=elem.tag) + else: + reason = "{!r} is not an element of the schema".format(elem) + yield schema.validation_error('lax', reason, elem, source, namespaces) + return for result in xsd_element.iter_decode(elem, **kwargs): if isinstance(result, XMLSchemaValidationError): @@ -1302,21 +1362,38 @@ def iter_decode(self, source, path=None, schema_path=None, validation='lax', pro if validation not in XSD_VALIDATION_MODES: raise XMLSchemaValueError("validation argument can be 'strict', 'lax' or 'skip': %r" % validation) elif not isinstance(source, XMLResource): - source = XMLResource(source=source, defuse=self.defuse, timeout=self.timeout, lazy=False) + source = XMLResource(source, defuse=self.defuse, timeout=self.timeout, lazy=False) if not schema_path and path: schema_path = path if path.startswith('/') else '/%s/%s' % (source.root.tag, path) if process_namespaces: - namespaces = {} if namespaces is None else namespaces.copy() - namespaces.update(source.get_namespaces()) + root_only = source.is_lazy() and not namespaces + namespaces = source.get_namespaces(namespaces, root_only) + namespace = source.namespace or namespaces.get('', '') else: - namespaces = {} + root_only = namespaces = None + namespace = source.namespace - converter = self.get_converter(converter, namespaces, **kwargs) - id_map = Counter() - inherited = {} + try: + schema = self.maps.namespaces[namespace][0] + except (KeyError, IndexError): + reason = 'the namespace {!r} is not loaded'.format(namespace) + yield self.validation_error('lax', reason, source.root, source, namespaces) + return + id_map = Counter() + converter = self.get_converter(converter, namespaces, **kwargs) + kwargs.update( + converter=converter, + namespaces=converter.namespaces, + source=source, + use_defaults=use_defaults, + datetime_types=datetime_types, + fill_missing=fill_missing, + id_map=id_map, + inherited={}, + ) if decimal_type is not None: kwargs['decimal_type'] = decimal_type if filler is not None: @@ -1324,26 +1401,20 @@ def iter_decode(self, source, path=None, schema_path=None, validation='lax', pro if max_depth is not None: kwargs['max_depth'] = max_depth - namespace = source.namespace or namespaces.get('', '') - try: - schema = self.maps.namespaces[namespace][0] - except (KeyError, IndexError): - reason = 'the namespace {!r} is not loaded'.format(namespace) - yield self.validation_error('lax', reason, source.root, source, namespaces) - return + if root_only: + converter.namespaces.clear() - for elem in source.iterfind(path, namespaces): + for elem in source.iterfind(path, converter.namespaces): xsd_element = schema.get_element(elem.tag, schema_path, namespaces) if xsd_element is None: - reason = "{!r} is not an element of the schema".format(elem) - yield schema.validation_error('lax', reason, elem, source, namespaces) - return - - for obj in xsd_element.iter_decode( - elem, validation, converter=converter, source=source, - namespaces=namespaces, use_defaults=use_defaults, - datetime_types=datetime_types, fill_missing=fill_missing, - id_map=id_map, inherited=inherited, **kwargs): + if XSI_TYPE in elem.attrib: + xsd_element = self.create_element(name=elem.tag) + else: + reason = "{!r} is not an element of the schema".format(elem) + yield schema.validation_error('lax', reason, elem, source, namespaces) + return + + for obj in xsd_element.iter_decode(elem, validation, **kwargs): yield obj for k, v in id_map.items(): diff --git a/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd b/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd index ba49a10c..74dcc364 100644 --- a/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd +++ b/xmlschema/validators/schemas/XSD_1.1/xsd11-extra.xsd @@ -3,7 +3,8 @@ Chameleon schema for defining XSD 1.1 list type builtins and to override openContent/defaultOpenContent declarations for the xmlschema library. --> - + diff --git a/xmlschema/validators/simple_types.py b/xmlschema/validators/simple_types.py index e6e5a3d5..efeb1d39 100644 --- a/xmlschema/validators/simple_types.py +++ b/xmlschema/validators/simple_types.py @@ -20,10 +20,10 @@ from ..qnames import XSD_ANY_TYPE, XSD_SIMPLE_TYPE, XSD_ANY_ATOMIC_TYPE, \ XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_ANY_ATTRIBUTE, XSD_PATTERN, \ XSD_MIN_INCLUSIVE, XSD_MIN_EXCLUSIVE, XSD_MAX_INCLUSIVE, XSD_MAX_EXCLUSIVE, \ - XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_LIST, \ - XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, XSD_ASSERTION, \ - XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, XSD_EXPLICIT_TIMEZONE, \ - XSD_ERROR, XSD_ASSERT, get_qname, local_name + XSD_LENGTH, XSD_MIN_LENGTH, XSD_MAX_LENGTH, XSD_WHITE_SPACE, XSD_ENUMERATION,\ + XSD_LIST, XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_RESTRICTION, XSD_ANNOTATION, \ + XSD_ASSERTION, XSD_ID, XSD_IDREF, XSD_FRACTION_DIGITS, XSD_TOTAL_DIGITS, \ + XSD_EXPLICIT_TIMEZONE, XSD_ERROR, XSD_ASSERT, get_qname, local_name from ..helpers import get_xsd_derivation_attribute from .exceptions import XMLSchemaValidationError, XMLSchemaEncodeError, \ @@ -108,6 +108,7 @@ class XsdSimpleType(XsdType, ValidationMixin): white_space = None patterns = None validators = () + allow_empty = True def __init__(self, elem, schema, parent, name=None, facets=None): super(XsdSimpleType, self).__init__(elem, schema, parent, name) @@ -120,6 +121,9 @@ def __setattr__(self, name, value): if not isinstance(self, XsdAtomicBuiltin): self._parse_facets(value) + if self.min_length: + self.allow_empty = False + white_space = getattr(self.get_facet(XSD_WHITE_SPACE), 'value', None) if white_space is not None: self.white_space = white_space @@ -127,6 +131,12 @@ def __setattr__(self, name, value): patterns = self.get_facet(XSD_PATTERN) if patterns is not None: self.patterns = patterns + if all(p.match('') is None for p in patterns.patterns): + self.allow_empty = False + + enumeration = self.get_facet(XSD_ENUMERATION) + if enumeration is not None and '' not in enumeration: + self.allow_empty = False if value: if None in value: @@ -301,7 +311,7 @@ def is_empty(self): return self.max_length == 0 def is_emptiable(self): - return self.min_length is None or self.min_length == 0 + return self.allow_empty def has_simple_content(self): return True @@ -335,7 +345,7 @@ def is_derived(self, other, derivation=None): return self.base_type.is_derived(other, derivation) def is_dynamic_consistent(self, other): - return other is self.any_type or other is self.any_simple_type or self.is_derived(other) or \ + return other.name in (XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE) or self.is_derived(other) or \ hasattr(other, 'member_types') and any(self.is_derived(mt) for mt in other.member_types) def normalize(self, text): @@ -706,6 +716,9 @@ def _parse(self): except XMLSchemaValueError as err: self.parse_error(str(err), elem) self.base_type = self.maps.types[XSD_ANY_ATOMIC_TYPE] + else: + if not base_type.allow_empty and self.min_length != 0: + self.allow_empty = False @property def admitted_facets(self): @@ -859,6 +872,8 @@ def _parse(self): self.parse_error("Cannot use xs:anyAtomicType as base type of a user-defined type") else: self.member_types = member_types + if all(not mt.allow_empty for mt in member_types): + self.allow_empty = False @property def admitted_facets(self): @@ -871,7 +886,7 @@ def is_list(self): return all(mt.is_list() for mt in self.member_types) def is_dynamic_consistent(self, other): - return other is self.any_type or other is self.any_simple_type or \ + return other.name in (XSD_ANY_TYPE, XSD_ANY_SIMPLE_TYPE) or \ other.is_derived(self) or hasattr(other, 'member_types') and \ any(mt1.is_derived(mt2) for mt1 in other.member_types for mt2 in self.member_types) diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 82c2071b..ab793ff6 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -17,7 +17,7 @@ from ..exceptions import XMLSchemaValueError from ..namespaces import XSI_NAMESPACE from ..qnames import XSD_ANY, XSD_ANY_ATTRIBUTE, XSD_OPEN_CONTENT, \ - XSD_DEFAULT_OPEN_CONTENT, get_namespace + XSD_DEFAULT_OPEN_CONTENT, XSI_TYPE, get_namespace from ..xpath import XMLSchemaProxy, ElementPathMixin from .xsdbase import ValidationMixin, XsdComponent, ParticleMixin @@ -29,11 +29,6 @@ class XsdWildcard(XsdComponent, ValidationMixin): not_qname = () process_contents = 'strict' - def __init__(self, elem, schema, parent): - if parent is None: - raise XMLSchemaValueError("'parent' attribute is None but %r cannot be global!" % self) - super(XsdWildcard, self).__init__(elem, schema, parent) - def __repr__(self): if self.not_namespace: return '%s(not_namespace=%r, process_contents=%r)' % ( @@ -368,6 +363,10 @@ class XsdAnyElement(XsdWildcard, ParticleMixin, ElementPathMixin): _ADMITTED_TAGS = {XSD_ANY} precedences = () + def __init__(self, elem, schema, parent): + super(XsdAnyElement, self).__init__(elem, schema, parent) + ElementPathMixin.__init__(self) + def __repr__(self): if self.namespace: return '%s(namespace=%r, process_contents=%r, occurs=%r)' % ( @@ -427,7 +426,7 @@ def iter_substitutes(): def iter_decode(self, elem, validation='lax', **kwargs): if not self.is_matching(elem.tag): if validation != 'skip': - reason = "element %r not allowed here." % elem.tag + reason = "{!r} is not allowed here".format(elem) yield self.validation_error(validation, reason, elem, **kwargs) elif self.process_contents == 'skip': @@ -437,9 +436,14 @@ def iter_decode(self, elem, validation='lax', **kwargs): try: xsd_element = self.maps.lookup_element(elem.tag) except LookupError: - if validation == 'skip': - yield self.any_type.decode(elem) if len(elem) > 0 else elem.text - elif self.process_contents == 'strict': + if XSI_TYPE in elem.attrib: + xsd_element = self.schema.create_element(name=elem.tag) + for result in xsd_element.iter_decode(elem, validation, **kwargs): + yield result + elif validation == 'skip' or self.process_contents == 'lax': + for result in self.any_type.iter_decode(elem, validation, **kwargs): + yield result + else: reason = "element %r not found." % elem.tag yield self.validation_error(validation, reason, elem, **kwargs) else: @@ -459,7 +463,7 @@ def iter_encode(self, obj, validation='lax', **kwargs): if not self.is_namespace_allowed(namespace): if validation != 'skip': - reason = "element %r not allowed here." % name + reason = "element {!r} is not allowed here".format(name) yield self.validation_error(validation, reason, value, **kwargs) elif self.process_contents == 'skip': @@ -469,8 +473,9 @@ def iter_encode(self, obj, validation='lax', **kwargs): try: xsd_element = self.maps.lookup_element(name) except LookupError: - if validation == 'skip': - yield self.any_type.encode(value) + if validation == 'skip' or self.process_contents == 'lax': + for result in self.any_type.iter_encode(obj, validation, **kwargs): + yield result elif self.process_contents == 'strict': reason = "element %r not found." % name yield self.validation_error(validation, reason, **kwargs) @@ -659,7 +664,7 @@ def is_matching(self, name, default_namespace=None, group=None, occurs=None): elif not name or name[0] == '{': if not self.is_namespace_allowed(get_namespace(name)): return False - elif default_namespace is not None: + elif default_namespace is None: if not self.is_namespace_allowed(''): return False else: diff --git a/xmlschema/validators/xsdbase.py b/xmlschema/validators/xsdbase.py index a1af296e..7ddce704 100644 --- a/xmlschema/validators/xsdbase.py +++ b/xmlschema/validators/xsdbase.py @@ -21,7 +21,6 @@ get_qname, local_name, qname_to_prefixed from ..etree import etree_tostring from ..helpers import is_etree_element -from ..converters import XMLSchemaConverter from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, \ XMLSchemaDecodeError, XMLSchemaEncodeError @@ -196,27 +195,6 @@ def _parse_xpath_default_namespace(self, elem): self.parse_error(msg % (value, ' | '.join(admitted_values)), elem) return '' - def get_converter(self, converter=None, namespaces=None, **kwargs): - """ - Returns a new converter instance. - - :param converter: can be a converter class or instance. If it's an instance \ - the new instance is copied from it and configured with the provided arguments. - :param namespaces: is an optional mapping from namespace prefix to URI. - :param kwargs: optional arguments for initialize the converter instance. - :return: a converter instance. - """ - if converter is None: - converter = getattr(self, 'converter', XMLSchemaConverter) - - if isinstance(converter, XMLSchemaConverter): - return converter.copy(namespaces=namespaces, **kwargs) - elif issubclass(converter, XMLSchemaConverter): - return converter(namespaces, **kwargs) - else: - msg = "'converter' argument must be a %r subclass or instance: %r" - raise XMLSchemaTypeError(msg % (XMLSchemaConverter, converter)) - class XsdComponent(XsdValidator): """ @@ -420,7 +398,7 @@ def _parse_target_namespace(self): xsd_type = self.get_parent_type() if xsd_type and xsd_type.parent is None and \ - (xsd_type.derivation != 'restriction' or xsd_type.base_type is self.any_type): + (xsd_type.derivation != 'restriction' or xsd_type.base_type.name == XSD_ANY_TYPE): self.parse_error("a declaration contained in a global complexType " "must has the same namespace as its parent schema") @@ -701,7 +679,7 @@ def is_blocked(self, xsd_element): return any(self.is_derived(xsd_type, derivation) for derivation in block) def is_dynamic_consistent(self, other): - return other is self.any_type or self.is_derived(other) or \ + return other.name == XSD_ANY_TYPE or self.is_derived(other) or \ hasattr(other, 'member_types') and any(self.is_derived(mt) for mt in other.member_types) def is_key(self): diff --git a/xmlschema/xpath.py b/xmlschema/xpath.py index 8a215da0..bf38f12e 100644 --- a/xmlschema/xpath.py +++ b/xmlschema/xpath.py @@ -14,6 +14,7 @@ from __future__ import unicode_literals from abc import abstractmethod from elementpath import XPath2Parser, XPathSchemaContext, AbstractSchemaProxy +import threading from .compat import Sequence from .qnames import XSD_SCHEMA @@ -97,14 +98,13 @@ def bind_parser(self, parser): if parser.schema is not self: parser.schema = self - try: - parser.symbol_table = self._schema.xpath_tokens[parser.__class__] - except KeyError: + if self._schema.xpath_tokens is None: parser.symbol_table = parser.__class__.symbol_table.copy() - self._schema.xpath_tokens[parser.__class__] = parser.symbol_table for xsd_type in self.iter_atomic_types(): parser.schema_constructor(xsd_type.name) - + self._schema.xpath_tokens = parser.symbol_table + else: + parser.symbol_table = self._schema.xpath_tokens parser.tokenizer = parser.create_tokenizer(parser.symbol_table) def get_context(self): @@ -183,11 +183,20 @@ class ElementPathMixin(Sequence): _xpath_parser = None # Internal XPath 2.0 parser, instantiated at first use. + def __init__(self): + self._xpath_lock = threading.Lock() # Lock for XPath operations + def __getstate__(self): state = self.__dict__.copy() + state.pop('_xpath_lock', None) state.pop('_xpath_parser', None) + state.pop('xpath_tokens', None) # For schema objects return state + def __setstate__(self, state): + self.__dict__.update(state) + self._xpath_lock = threading.Lock() + @abstractmethod def __iter__(self): pass @@ -223,11 +232,6 @@ def xpath_proxy(self): """Returns an XPath proxy instance bound with the schema.""" raise NotImplementedError - def _rebind_xpath_parser(self): - """Rebind XPath 2 parser with schema component.""" - if self._xpath_parser is not None: - self._xpath_parser.schema.bind_parser(self._xpath_parser) - def _get_xpath_namespaces(self, namespaces=None): """ Returns a dictionary with namespaces for XPath selection. @@ -251,12 +255,14 @@ def _xpath_parse(self, path, namespaces=None): path = ''.join(['/', XSD_SCHEMA, path]) namespaces = self._get_xpath_namespaces(namespaces) - if self._xpath_parser is None: - self._xpath_parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy) - else: - self._xpath_parser.namespaces = namespaces - - return self._xpath_parser.parse(path) + with self._xpath_lock: + parser = self._xpath_parser + if parser is None: + parser = XPath2Parser(namespaces, strict=False, schema=self.xpath_proxy) + self._xpath_parser = parser + else: + parser.namespaces = namespaces + return parser.parse(path) def find(self, path, namespaces=None): """