diff --git a/CHANGELOG.rst b/CHANGELOG.rst index aaebbb3a..73d76e54 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,7 +2,7 @@ CHANGELOG ********* -`v1.0.17`_ (2019-11-xx) +`v1.0.17`_ (2019-11-21) ======================= * Enhancement of validation-only speed (~15%) * Added *is_valid()* and *iter_errors()* to module API diff --git a/publiccode.yml b/publiccode.yml index 4710901b..77c2409b 100644 --- a/publiccode.yml +++ b/publiccode.yml @@ -6,7 +6,7 @@ publiccodeYmlVersion: '0.2' name: xmlschema url: 'https://github.com/sissaschool/xmlschema' landingURL: 'https://github.com/sissaschool/xmlschema' -releaseDate: '2019-11-xx' +releaseDate: '2019-11-21' softwareVersion: v1.0.17 developmentStatus: stable platforms: diff --git a/xmlschema/converters.py b/xmlschema/converters.py index 8301aec6..f8b7bfc2 100644 --- a/xmlschema/converters.py +++ b/xmlschema/converters.py @@ -326,6 +326,8 @@ def element_encode(self, obj, xsd_element, level=0): if not isinstance(obj, (self.dict, dict)): if xsd_element.type.is_simple() or xsd_element.type.has_simple_content(): return ElementData(tag, obj, None, {}) + elif xsd_element.type.mixed and not isinstance(obj, list): + return ElementData(tag, obj, None, {}) else: return ElementData(tag, None, obj, {}) diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py index 651281e5..21698e17 100644 --- a/xmlschema/tests/test_factory/validation_tests.py +++ b/xmlschema/tests/test_factory/validation_tests.py @@ -125,6 +125,7 @@ def check_etree_encode(self, root, converter=None, **kwargs): if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict: if debug_mode: pdb.set_trace() + breakpoint() raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original") elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()): return # can't check encode equivalence if xsi:type is provided diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py index 55cd4e09..f695af3a 100644 --- a/xmlschema/tests/test_w3c_suite.py +++ b/xmlschema/tests/test_w3c_suite.py @@ -97,8 +97,10 @@ # Invalid XML tests '../sunData/combined/xsd005/xsd005.n05.xml', # 3984: Invalid if lxml is used (xsi:type and duplicate prefix) - '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078 - '../msData/additional/test93490_8.xml', # 4799: Idem + '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078 + '../msData/additional/test93490_8.xml', # 4799: Idem + '../msData/datatypes/gMonth002.xml', # 8017: gMonth bogus: conflicts with other invalid schema tests + '../msData/datatypes/gMonth004.xml', # 8019: (http://www.w3.org/Bugs/Public/show_bug.cgi?id=6901) # Valid XML tests '../ibmData/instance_invalid/S3_4_2_4/s3_4_2_4ii03.xml', # defaultAttributeApply is true (false in comment) diff --git a/xmlschema/tests/validation/test_decoding.py b/xmlschema/tests/validation/test_decoding.py index 93d20503..fd29c9fc 100644 --- a/xmlschema/tests/validation/test_decoding.py +++ b/xmlschema/tests/validation/test_decoding.py @@ -480,9 +480,9 @@ def test_dict_granularity(self): def test_any_type(self): any_type = xmlschema.XMLSchema.meta_schema.types['anyType'] xml_data_1 = ElementTree.Element('dummy') - self.assertEqual(any_type.decode(xml_data_1), (None, [], [])) + self.assertIsNone(any_type.decode(xml_data_1)) xml_data_2 = ElementTree.fromstring('\n \n \n') - self.assertEqual(any_type.decode(xml_data_2), (None, [], [])) # Currently no decoding yet + self.assertIsNone(any_type.decode(xml_data_2)) # Currently no decoding yet def test_choice_model_decoding(self): schema = xmlschema.XMLSchema(self.casepath('issues/issue_041/issue_041.xsd')) diff --git a/xmlschema/tests/validation/test_encoding.py b/xmlschema/tests/validation/test_encoding.py index ffa66235..2b3939e7 100644 --- a/xmlschema/tests/validation/test_encoding.py +++ b/xmlschema/tests/validation/test_encoding.py @@ -306,7 +306,7 @@ def test_max_occurs_sequence(self): def test_encode_unordered_content(self): schema = self.get_schema(""" - + diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py index 7941f19f..a33649f0 100644 --- a/xmlschema/validators/complex_types.py +++ b/xmlschema/validators/complex_types.py @@ -17,6 +17,7 @@ XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, \ get_qname, local_name from ..helpers import get_xsd_derivation_attribute +from ..converters import ElementData from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError from .xsdbase import XsdComponent, XsdType, ValidationMixin @@ -575,89 +576,44 @@ def decode(self, data, *args, **kwargs): def iter_decode(self, elem, validation='lax', **kwargs): """ - Decode an Element instance. + Decode an Element instance. A dummy element is created for the type and it's + used for decode data. Typically used for decoding with xs:anyType when an XSD + element is not available. :param elem: the Element that has to be decoded. :param validation: the validation mode. Can be 'lax', 'strict' or 'skip. :param kwargs: keyword arguments for the decoding process. - :return: yields a 3-tuple (simple content, complex content, attributes) containing \ - the decoded parts, eventually preceded by a sequence of validation or decoding errors. + :return: yields a decoded object, eventually preceded by a sequence of \ + validation or decoding errors. """ - if self.is_empty() and elem.text: - reason = "character data between child elements not allowed because the type's content is empty" - yield self.validation_error(validation, reason, elem, **kwargs) + xsd_element = self.schema.create_element(name=elem.tag) + xsd_element.type = self + for result in xsd_element.iter_decode(elem, validation, **kwargs): + yield result - # XSD 1.1 assertions - for assertion in self.assertions: - for error in assertion(elem, **kwargs): - yield self.validation_error(validation, error, **kwargs) - - for result in self.attributes.iter_decode(elem.attrib, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - attributes = result - break - else: - attributes = None - - if self.has_simple_content(): - if len(elem) and validation != 'skip': - reason = "a simple content element can't has child elements." - yield self.validation_error(validation, reason, elem, **kwargs) - - if elem.text is not None: - text = elem.text or kwargs.pop('default', '') - for result in self.content_type.iter_decode(text, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - yield result, None, attributes - else: - yield None, None, attributes - else: - for result in self.content_type.iter_decode(elem, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - yield None, result, attributes - - def iter_encode(self, element_data, validation='lax', **kwargs): + def iter_encode(self, obj, validation='lax', **kwargs): """ - Encode an element data instance. + Encode XML data. A dummy element is created for the type and it's used for + encode data. Typically used for encoding with xs:anyType when an XSD element + is not available. - :param element_data: an ElementData instance with unencoded data. + :param obj: decoded XML data. :param validation: the validation mode: can be 'lax', 'strict' or 'skip'. :param kwargs: keyword arguments for the encoding process. - :return: yields a 3-tuple (text, content, attributes) containing the encoded parts, \ - eventually preceded by a sequence of validation or decoding errors. + :return: yields an Element, eventually preceded by a sequence of \ + validation or encoding errors. """ - for result in self.attributes.iter_encode(element_data.attributes, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - attributes = result - break - else: - attributes = () + name, value = obj + xsd_element = self.schema.create_element(name=name) + xsd_element.type = self - if self.has_simple_content(): - if element_data.text is None: - yield None, element_data.content, attributes - else: - for result in self.content_type.iter_encode(element_data.text, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): - yield result - else: - yield result, element_data.content, attributes - else: - for result in self.content_type.iter_encode(element_data, validation, **kwargs): - if isinstance(result, XMLSchemaValidationError): + if isinstance(value, list): + for item in value: + for result in xsd_element.iter_encode(item, validation, **kwargs): yield result - elif result: - yield result[0], result[1], attributes - else: - yield None, None, attributes + else: + for result in xsd_element.iter_encode(value, validation, **kwargs): + yield result class Xsd11ComplexType(XsdComplexType): diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py index 58c3df15..178aad29 100644 --- a/xmlschema/validators/elements.py +++ b/xmlschema/validators/elements.py @@ -530,8 +530,7 @@ def iter_decode(self, elem, validation='lax', **kwargs): if XSI_TYPE in elem.attrib: type_name = elem.attrib[XSI_TYPE].strip() try: - nsmap = getattr(elem, 'nsmap', namespaces) - xsd_type = self.maps.get_instance_type(type_name, xsd_type, namespaces=nsmap) + xsd_type = self.maps.get_instance_type(type_name, xsd_type, namespaces) except (KeyError, TypeError) as err: yield self.validation_error(validation, err, elem, **kwargs) diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py index c68a5335..5c2d4a43 100644 --- a/xmlschema/validators/groups.py +++ b/xmlschema/validators/groups.py @@ -38,6 +38,10 @@ }) +def not_whitespace(s): + return s and s.strip() + + class XsdGroup(XsdComponent, ModelGroup, ValidationMixin): """ Class for XSD 1.0 *model group* definitions. @@ -567,9 +571,6 @@ def iter_decode(self, elem, validation='lax', **kwargs): :return: yields a list of 3-tuples (key, decoded data, decoder), \ eventually preceded by a sequence of validation or decoding errors. """ - def not_whitespace(s): - return s is not None and s.strip() - result_list = [] cdata_index = 1 # keys for CDATA sections are positive integers @@ -698,13 +699,9 @@ def iter_encode(self, element_data, validation='lax', **kwargs): (key, decoded data, decoder), eventually preceded by a sequence of validation \ or encoding errors. """ - if not element_data.content: # or - yield element_data.content - return - level = kwargs['level'] = kwargs.get('level', 0) + 1 errors = [] - text = None + text = element_data.text children = [] try: indent = kwargs['indent'] @@ -720,11 +717,14 @@ def iter_encode(self, element_data, validation='lax', **kwargs): default_namespace = converter.get('') model = ModelVisitor(self) - cdata_index = 0 + index = cdata_index = 0 + wrong_content_type = False if isinstance(element_data.content, dict) or kwargs.get('unordered'): content = model.iter_unordered_content(element_data.content) elif not isinstance(element_data.content, list): + if element_data.content is not None: + wrong_content_type = True content = [] elif converter.losslessly: content = element_data.content @@ -785,9 +785,8 @@ def iter_encode(self, element_data, validation='lax', **kwargs): children.append(result) if model.element is not None: - index = len(element_data.content) - cdata_index for particle, occurs, expected in model.stop(): - errors.append((index, particle, occurs, expected)) + errors.append((index - cdata_index, particle, occurs, expected)) if children: if children[-1].tail is None: @@ -795,7 +794,10 @@ def iter_encode(self, element_data, validation='lax', **kwargs): else: children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n') - if validation != 'skip' and (errors or not content): + cdata_not_allowed = not self.mixed and not_whitespace(text) and self and \ + (len(self) > 1 or not isinstance(self[0], XsdAnyElement)) + + if validation != 'skip' and (errors or cdata_not_allowed or wrong_content_type): attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()} if validation == 'lax' and converter.etree_element_class is not etree_element: child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children] @@ -803,12 +805,18 @@ def iter_encode(self, element_data, validation='lax', **kwargs): else: elem = converter.etree_element(element_data.tag, text, children, attrib) - if not content: + if wrong_content_type: reason = "wrong content type {!r}".format(type(element_data.content)) yield self.validation_error(validation, reason, elem, **kwargs) + if cdata_not_allowed: + reason = "character data between child elements not allowed" + yield self.validation_error(validation, reason, elem, **kwargs) + for index, particle, occurs, expected in errors: - yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs) + yield self.children_validation_error( + validation, elem, index, particle, occurs, expected, **kwargs + ) yield text, children diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py index 0760c57b..ab793ff6 100644 --- a/xmlschema/validators/wildcards.py +++ b/xmlschema/validators/wildcards.py @@ -440,9 +440,10 @@ def iter_decode(self, elem, validation='lax', **kwargs): xsd_element = self.schema.create_element(name=elem.tag) for result in xsd_element.iter_decode(elem, validation, **kwargs): yield result - elif validation == 'skip': - yield self.any_type.decode(elem) if len(elem) > 0 else elem.text - elif self.process_contents == 'strict': + elif validation == 'skip' or self.process_contents == 'lax': + for result in self.any_type.iter_decode(elem, validation, **kwargs): + yield result + else: reason = "element %r not found." % elem.tag yield self.validation_error(validation, reason, elem, **kwargs) else: @@ -472,8 +473,9 @@ def iter_encode(self, obj, validation='lax', **kwargs): try: xsd_element = self.maps.lookup_element(name) except LookupError: - if validation == 'skip': - yield self.any_type.encode(value) + if validation == 'skip' or self.process_contents == 'lax': + for result in self.any_type.iter_encode(obj, validation, **kwargs): + yield result elif self.process_contents == 'strict': reason = "element %r not found." % name yield self.validation_error(validation, reason, **kwargs)