diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index aaebbb3a..73d76e54 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,7 +2,7 @@
CHANGELOG
*********
-`v1.0.17`_ (2019-11-xx)
+`v1.0.17`_ (2019-11-21)
=======================
* Enhancement of validation-only speed (~15%)
* Added *is_valid()* and *iter_errors()* to module API
diff --git a/publiccode.yml b/publiccode.yml
index 4710901b..77c2409b 100644
--- a/publiccode.yml
+++ b/publiccode.yml
@@ -6,7 +6,7 @@ publiccodeYmlVersion: '0.2'
name: xmlschema
url: 'https://github.com/sissaschool/xmlschema'
landingURL: 'https://github.com/sissaschool/xmlschema'
-releaseDate: '2019-11-xx'
+releaseDate: '2019-11-21'
softwareVersion: v1.0.17
developmentStatus: stable
platforms:
diff --git a/xmlschema/converters.py b/xmlschema/converters.py
index 8301aec6..f8b7bfc2 100644
--- a/xmlschema/converters.py
+++ b/xmlschema/converters.py
@@ -326,6 +326,8 @@ def element_encode(self, obj, xsd_element, level=0):
if not isinstance(obj, (self.dict, dict)):
if xsd_element.type.is_simple() or xsd_element.type.has_simple_content():
return ElementData(tag, obj, None, {})
+ elif xsd_element.type.mixed and not isinstance(obj, list):
+ return ElementData(tag, obj, None, {})
else:
return ElementData(tag, None, obj, {})
diff --git a/xmlschema/tests/test_factory/validation_tests.py b/xmlschema/tests/test_factory/validation_tests.py
index 651281e5..21698e17 100644
--- a/xmlschema/tests/test_factory/validation_tests.py
+++ b/xmlschema/tests/test_factory/validation_tests.py
@@ -125,6 +125,7 @@ def check_etree_encode(self, root, converter=None, **kwargs):
if converter not in (ParkerConverter, AbderaConverter, JsonMLConverter) and not skip_strict:
if debug_mode:
pdb.set_trace()
+ breakpoint()
raise AssertionError(str(err) + msg_tmpl % "encoded tree differs from original")
elif converter is ParkerConverter and any(XSI_TYPE in e.attrib for e in root.iter()):
return # can't check encode equivalence if xsi:type is provided
diff --git a/xmlschema/tests/test_w3c_suite.py b/xmlschema/tests/test_w3c_suite.py
index 55cd4e09..f695af3a 100644
--- a/xmlschema/tests/test_w3c_suite.py
+++ b/xmlschema/tests/test_w3c_suite.py
@@ -97,8 +97,10 @@
# Invalid XML tests
'../sunData/combined/xsd005/xsd005.n05.xml', # 3984: Invalid if lxml is used (xsi:type and duplicate prefix)
- '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078
- '../msData/additional/test93490_8.xml', # 4799: Idem
+ '../msData/additional/test93490_4.xml', # 4795: https://www.w3.org/Bugs/Public/show_bug.cgi?id=4078
+ '../msData/additional/test93490_8.xml', # 4799: Idem
+ '../msData/datatypes/gMonth002.xml', # 8017: gMonth bogus: conflicts with other invalid schema tests
+ '../msData/datatypes/gMonth004.xml', # 8019: (http://www.w3.org/Bugs/Public/show_bug.cgi?id=6901)
# Valid XML tests
'../ibmData/instance_invalid/S3_4_2_4/s3_4_2_4ii03.xml', # defaultAttributeApply is true (false in comment)
diff --git a/xmlschema/tests/validation/test_decoding.py b/xmlschema/tests/validation/test_decoding.py
index 93d20503..fd29c9fc 100644
--- a/xmlschema/tests/validation/test_decoding.py
+++ b/xmlschema/tests/validation/test_decoding.py
@@ -480,9 +480,9 @@ def test_dict_granularity(self):
def test_any_type(self):
any_type = xmlschema.XMLSchema.meta_schema.types['anyType']
xml_data_1 = ElementTree.Element('dummy')
- self.assertEqual(any_type.decode(xml_data_1), (None, [], []))
+ self.assertIsNone(any_type.decode(xml_data_1))
xml_data_2 = ElementTree.fromstring('\n \n \n')
- self.assertEqual(any_type.decode(xml_data_2), (None, [], [])) # Currently no decoding yet
+ self.assertIsNone(any_type.decode(xml_data_2)) # Currently no decoding yet
def test_choice_model_decoding(self):
schema = xmlschema.XMLSchema(self.casepath('issues/issue_041/issue_041.xsd'))
diff --git a/xmlschema/tests/validation/test_encoding.py b/xmlschema/tests/validation/test_encoding.py
index ffa66235..2b3939e7 100644
--- a/xmlschema/tests/validation/test_encoding.py
+++ b/xmlschema/tests/validation/test_encoding.py
@@ -306,7 +306,7 @@ def test_max_occurs_sequence(self):
def test_encode_unordered_content(self):
schema = self.get_schema("""
-
+
diff --git a/xmlschema/validators/complex_types.py b/xmlschema/validators/complex_types.py
index 7941f19f..a33649f0 100644
--- a/xmlschema/validators/complex_types.py
+++ b/xmlschema/validators/complex_types.py
@@ -17,6 +17,7 @@
XSD_SIMPLE_CONTENT, XSD_ANY_SIMPLE_TYPE, XSD_OPEN_CONTENT, XSD_ASSERT, \
get_qname, local_name
from ..helpers import get_xsd_derivation_attribute
+from ..converters import ElementData
from .exceptions import XMLSchemaValidationError, XMLSchemaDecodeError
from .xsdbase import XsdComponent, XsdType, ValidationMixin
@@ -575,89 +576,44 @@ def decode(self, data, *args, **kwargs):
def iter_decode(self, elem, validation='lax', **kwargs):
"""
- Decode an Element instance.
+ Decode an Element instance. A dummy element is created for the type and it's
+ used for decode data. Typically used for decoding with xs:anyType when an XSD
+ element is not available.
:param elem: the Element that has to be decoded.
:param validation: the validation mode. Can be 'lax', 'strict' or 'skip.
:param kwargs: keyword arguments for the decoding process.
- :return: yields a 3-tuple (simple content, complex content, attributes) containing \
- the decoded parts, eventually preceded by a sequence of validation or decoding errors.
+ :return: yields a decoded object, eventually preceded by a sequence of \
+ validation or decoding errors.
"""
- if self.is_empty() and elem.text:
- reason = "character data between child elements not allowed because the type's content is empty"
- yield self.validation_error(validation, reason, elem, **kwargs)
+ xsd_element = self.schema.create_element(name=elem.tag)
+ xsd_element.type = self
+ for result in xsd_element.iter_decode(elem, validation, **kwargs):
+ yield result
- # XSD 1.1 assertions
- for assertion in self.assertions:
- for error in assertion(elem, **kwargs):
- yield self.validation_error(validation, error, **kwargs)
-
- for result in self.attributes.iter_decode(elem.attrib, validation, **kwargs):
- if isinstance(result, XMLSchemaValidationError):
- yield result
- else:
- attributes = result
- break
- else:
- attributes = None
-
- if self.has_simple_content():
- if len(elem) and validation != 'skip':
- reason = "a simple content element can't has child elements."
- yield self.validation_error(validation, reason, elem, **kwargs)
-
- if elem.text is not None:
- text = elem.text or kwargs.pop('default', '')
- for result in self.content_type.iter_decode(text, validation, **kwargs):
- if isinstance(result, XMLSchemaValidationError):
- yield result
- else:
- yield result, None, attributes
- else:
- yield None, None, attributes
- else:
- for result in self.content_type.iter_decode(elem, validation, **kwargs):
- if isinstance(result, XMLSchemaValidationError):
- yield result
- else:
- yield None, result, attributes
-
- def iter_encode(self, element_data, validation='lax', **kwargs):
+ def iter_encode(self, obj, validation='lax', **kwargs):
"""
- Encode an element data instance.
+ Encode XML data. A dummy element is created for the type and it's used for
+ encode data. Typically used for encoding with xs:anyType when an XSD element
+ is not available.
- :param element_data: an ElementData instance with unencoded data.
+ :param obj: decoded XML data.
:param validation: the validation mode: can be 'lax', 'strict' or 'skip'.
:param kwargs: keyword arguments for the encoding process.
- :return: yields a 3-tuple (text, content, attributes) containing the encoded parts, \
- eventually preceded by a sequence of validation or decoding errors.
+ :return: yields an Element, eventually preceded by a sequence of \
+ validation or encoding errors.
"""
- for result in self.attributes.iter_encode(element_data.attributes, validation, **kwargs):
- if isinstance(result, XMLSchemaValidationError):
- yield result
- else:
- attributes = result
- break
- else:
- attributes = ()
+ name, value = obj
+ xsd_element = self.schema.create_element(name=name)
+ xsd_element.type = self
- if self.has_simple_content():
- if element_data.text is None:
- yield None, element_data.content, attributes
- else:
- for result in self.content_type.iter_encode(element_data.text, validation, **kwargs):
- if isinstance(result, XMLSchemaValidationError):
- yield result
- else:
- yield result, element_data.content, attributes
- else:
- for result in self.content_type.iter_encode(element_data, validation, **kwargs):
- if isinstance(result, XMLSchemaValidationError):
+ if isinstance(value, list):
+ for item in value:
+ for result in xsd_element.iter_encode(item, validation, **kwargs):
yield result
- elif result:
- yield result[0], result[1], attributes
- else:
- yield None, None, attributes
+ else:
+ for result in xsd_element.iter_encode(value, validation, **kwargs):
+ yield result
class Xsd11ComplexType(XsdComplexType):
diff --git a/xmlschema/validators/elements.py b/xmlschema/validators/elements.py
index 58c3df15..178aad29 100644
--- a/xmlschema/validators/elements.py
+++ b/xmlschema/validators/elements.py
@@ -530,8 +530,7 @@ def iter_decode(self, elem, validation='lax', **kwargs):
if XSI_TYPE in elem.attrib:
type_name = elem.attrib[XSI_TYPE].strip()
try:
- nsmap = getattr(elem, 'nsmap', namespaces)
- xsd_type = self.maps.get_instance_type(type_name, xsd_type, namespaces=nsmap)
+ xsd_type = self.maps.get_instance_type(type_name, xsd_type, namespaces)
except (KeyError, TypeError) as err:
yield self.validation_error(validation, err, elem, **kwargs)
diff --git a/xmlschema/validators/groups.py b/xmlschema/validators/groups.py
index c68a5335..5c2d4a43 100644
--- a/xmlschema/validators/groups.py
+++ b/xmlschema/validators/groups.py
@@ -38,6 +38,10 @@
})
+def not_whitespace(s):
+ return s and s.strip()
+
+
class XsdGroup(XsdComponent, ModelGroup, ValidationMixin):
"""
Class for XSD 1.0 *model group* definitions.
@@ -567,9 +571,6 @@ def iter_decode(self, elem, validation='lax', **kwargs):
:return: yields a list of 3-tuples (key, decoded data, decoder), \
eventually preceded by a sequence of validation or decoding errors.
"""
- def not_whitespace(s):
- return s is not None and s.strip()
-
result_list = []
cdata_index = 1 # keys for CDATA sections are positive integers
@@ -698,13 +699,9 @@ def iter_encode(self, element_data, validation='lax', **kwargs):
(key, decoded data, decoder), eventually preceded by a sequence of validation \
or encoding errors.
"""
- if not element_data.content: # or
- yield element_data.content
- return
-
level = kwargs['level'] = kwargs.get('level', 0) + 1
errors = []
- text = None
+ text = element_data.text
children = []
try:
indent = kwargs['indent']
@@ -720,11 +717,14 @@ def iter_encode(self, element_data, validation='lax', **kwargs):
default_namespace = converter.get('')
model = ModelVisitor(self)
- cdata_index = 0
+ index = cdata_index = 0
+ wrong_content_type = False
if isinstance(element_data.content, dict) or kwargs.get('unordered'):
content = model.iter_unordered_content(element_data.content)
elif not isinstance(element_data.content, list):
+ if element_data.content is not None:
+ wrong_content_type = True
content = []
elif converter.losslessly:
content = element_data.content
@@ -785,9 +785,8 @@ def iter_encode(self, element_data, validation='lax', **kwargs):
children.append(result)
if model.element is not None:
- index = len(element_data.content) - cdata_index
for particle, occurs, expected in model.stop():
- errors.append((index, particle, occurs, expected))
+ errors.append((index - cdata_index, particle, occurs, expected))
if children:
if children[-1].tail is None:
@@ -795,7 +794,10 @@ def iter_encode(self, element_data, validation='lax', **kwargs):
else:
children[-1].tail = children[-1].tail.strip() + (padding[:-indent] or '\n')
- if validation != 'skip' and (errors or not content):
+ cdata_not_allowed = not self.mixed and not_whitespace(text) and self and \
+ (len(self) > 1 or not isinstance(self[0], XsdAnyElement))
+
+ if validation != 'skip' and (errors or cdata_not_allowed or wrong_content_type):
attrib = {k: unicode_type(v) for k, v in element_data.attributes.items()}
if validation == 'lax' and converter.etree_element_class is not etree_element:
child_tags = [converter.etree_element(e.tag, attrib=e.attrib) for e in children]
@@ -803,12 +805,18 @@ def iter_encode(self, element_data, validation='lax', **kwargs):
else:
elem = converter.etree_element(element_data.tag, text, children, attrib)
- if not content:
+ if wrong_content_type:
reason = "wrong content type {!r}".format(type(element_data.content))
yield self.validation_error(validation, reason, elem, **kwargs)
+ if cdata_not_allowed:
+ reason = "character data between child elements not allowed"
+ yield self.validation_error(validation, reason, elem, **kwargs)
+
for index, particle, occurs, expected in errors:
- yield self.children_validation_error(validation, elem, index, particle, occurs, expected, **kwargs)
+ yield self.children_validation_error(
+ validation, elem, index, particle, occurs, expected, **kwargs
+ )
yield text, children
diff --git a/xmlschema/validators/wildcards.py b/xmlschema/validators/wildcards.py
index 0760c57b..ab793ff6 100644
--- a/xmlschema/validators/wildcards.py
+++ b/xmlschema/validators/wildcards.py
@@ -440,9 +440,10 @@ def iter_decode(self, elem, validation='lax', **kwargs):
xsd_element = self.schema.create_element(name=elem.tag)
for result in xsd_element.iter_decode(elem, validation, **kwargs):
yield result
- elif validation == 'skip':
- yield self.any_type.decode(elem) if len(elem) > 0 else elem.text
- elif self.process_contents == 'strict':
+ elif validation == 'skip' or self.process_contents == 'lax':
+ for result in self.any_type.iter_decode(elem, validation, **kwargs):
+ yield result
+ else:
reason = "element %r not found." % elem.tag
yield self.validation_error(validation, reason, elem, **kwargs)
else:
@@ -472,8 +473,9 @@ def iter_encode(self, obj, validation='lax', **kwargs):
try:
xsd_element = self.maps.lookup_element(name)
except LookupError:
- if validation == 'skip':
- yield self.any_type.encode(value)
+ if validation == 'skip' or self.process_contents == 'lax':
+ for result in self.any_type.iter_encode(obj, validation, **kwargs):
+ yield result
elif self.process_contents == 'strict':
reason = "element %r not found." % name
yield self.validation_error(validation, reason, **kwargs)