From 12b71ba5ee7f75c281c73ebe232a3d26b929e4f3 Mon Sep 17 00:00:00 2001 From: Davide Brunato Date: Sat, 30 Mar 2019 09:16:51 +0100 Subject: [PATCH] Fix spacing errors - Added is_spaced() to TODP Parser base class - Remove '/' from next_is_path_step_token() methods - Increase release and update API documentation --- CHANGELOG.rst | 6 ++++++ doc/conf.py | 2 +- doc/pratt_api.rst | 8 ++++++++ elementpath/__init__.py | 2 +- elementpath/tdop_parser.py | 29 +++++++++++++++++++++++++---- elementpath/xpath1_parser.py | 4 +++- elementpath/xpath2_parser.py | 2 +- setup.py | 2 +- tests/test_xpath1_parser.py | 7 ++++++- 9 files changed, 52 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3957662e..1e3ccbdc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ CHANGELOG ********* +`v1.1.7`_ (TBD) +=============== +* Added Parser.is_spaced() method for checking if the current token has extra spaces before or after +* Fixes for '/' and ':' tokens + `v1.1.6`_ (2019-03-28) ====================== * Fixes for XSD datatypes @@ -116,3 +121,4 @@ CHANGELOG .. _v1.1.4: https://github.com/brunato/elementpath/compare/v1.1.3...v1.1.4 .. _v1.1.5: https://github.com/brunato/elementpath/compare/v1.1.4...v1.1.5 .. _v1.1.6: https://github.com/brunato/elementpath/compare/v1.1.5...v1.1.6 +.. _v1.1.7: https://github.com/brunato/elementpath/compare/v1.1.6...v1.1.7 diff --git a/doc/conf.py b/doc/conf.py index bc02bf01..954be9aa 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -31,7 +31,7 @@ # The short X.Y version version = '' # The full version, including alpha/beta/rc tags -release = '1.1.6' +release = '1.1.7' # -- General configuration --------------------------------------------------- diff --git a/doc/pratt_api.rst b/doc/pratt_api.rst index 6f1e8035..e2182605 100644 --- a/doc/pratt_api.rst +++ b/doc/pratt_api.rst @@ -38,6 +38,8 @@ Parser base class .. autoclass:: elementpath.Parser + .. autoattribute:: position + Parsing methods: .. automethod:: build_tokenizer @@ -46,6 +48,12 @@ Parser base class .. automethod:: raw_advance .. automethod:: expression + Helper methods for checking parser status: + + .. automethod:: is_source_start + .. automethod:: is_line_start + .. automethod:: is_spaced + Helper methods for building effective parser classes: .. automethod:: register diff --git a/elementpath/__init__.py b/elementpath/__init__.py index 4458c458..2956a912 100644 --- a/elementpath/__init__.py +++ b/elementpath/__init__.py @@ -8,7 +8,7 @@ # # @author Davide Brunato # -__version__ = '1.1.6' +__version__ = '1.1.7' __author__ = "Davide Brunato" __contact__ = "brunato@sissa.it" __copyright__ = "Copyright 2018-2019, SISSA" diff --git a/elementpath/tdop_parser.py b/elementpath/tdop_parser.py index 3b404045..904bd129 100644 --- a/elementpath/tdop_parser.py +++ b/elementpath/tdop_parser.py @@ -287,12 +287,15 @@ def wrong_syntax(self, message=None): symbol = self.value if SPECIAL_SYMBOL_PATTERN.match(self.symbol) is not None else self.symbol line_column = 'line %d, column %d' % self.parser.position token = self.parser.token - if token is not None and symbol != token.symbol: - msg = "unexpected symbol %r after %s at %s." % (symbol, token, line_column) + msg = "symbol %r after %s at %s" % (symbol, token, line_column) + else: + msg = "symbol %r at %s" % (symbol, line_column) + + if message: + raise ElementPathSyntaxError('%s: %s' % (msg, message), self) else: - msg = "unexpected symbol %r at %s." % (symbol, line_column) - raise ElementPathSyntaxError(msg + ' ' + message if message else msg, self) + raise ElementPathSyntaxError('unexpected %s.' % msg, self) def wrong_value(self, message='unknown error'): raise ElementPathValueError(message, self) @@ -555,6 +558,24 @@ def is_line_start(self): line_start = self.source[0:token_index].rindex('\n') + 1 return not bool(self.source[line_start:token_index].strip()) + def is_spaced(self, before=True, after=True): + """ + Returns `True` if the source has an extra space (whitespace, tab or newline) immediately + before or after the current position of the parser. + + :param before: if `True` considers also the extra spaces before the current token symbol. + :param after: if `True` considers also the extra spaces after the current token symbol. + """ + if self.match is None: + return False + start, end = self.match.span() + if before and start > 0 and self.source[start - 1] in ' \t\n': + return True + try: + return after and self.source[end] in ' \t\n' + except IndexError: + return False + @classmethod def register(cls, symbol, **kwargs): """ diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py index 464ccd16..ee18cf7f 100644 --- a/elementpath/xpath1_parser.py +++ b/elementpath/xpath1_parser.py @@ -178,7 +178,7 @@ def nud_(self): def next_is_path_step_token(self): return self.next_token.label == 'axis' or self.next_token.symbol in { '(integer)', '(string)', '(float)', '(decimal)', '(name)', 'node', 'text', '*', - '@', '..', '.', '(', '/', '{' + '@', '..', '.', '(', '{' } def parse(self, source): @@ -328,6 +328,8 @@ def led(self, left): elif left.symbol == '*' and next_token.symbol != '(name)': next_token.wrong_syntax() + if self.parser.is_spaced(): + self.wrong_syntax("a QName cannot contains spaces before or after ':'") self[:] = left, self.parser.expression(90) return self diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py index 9cbe4d17..4fcf4fbf 100644 --- a/elementpath/xpath2_parser.py +++ b/elementpath/xpath2_parser.py @@ -310,7 +310,7 @@ def evaluate_(self_, context=None): def next_is_path_step_token(self): return self.next_token.label in ('axis', 'function') or self.next_token.symbol in { - '(integer)', '(string)', '(float)', '(decimal)', '(name)', '*', '@', '..', '.', '(', '/', '{' + '(integer)', '(string)', '(float)', '(decimal)', '(name)', '*', '@', '..', '.', '(', '{' } def next_is_sequence_type_token(self): diff --git a/setup.py b/setup.py index 670cad19..2e1f2c1d 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ setup( name='elementpath', - version='1.1.6', + version='1.1.7', packages=['elementpath'], author='Davide Brunato', author_email='brunato@sissa.it', diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py index 64fdbe8c..61360cd4 100644 --- a/tests/test_xpath1_parser.py +++ b/tests/test_xpath1_parser.py @@ -265,6 +265,8 @@ def test_xpath_tokenizer(self): self.check_tokenizer("_last()", ['_last', '(', ')']) self.check_tokenizer("last ()", ['last', '', '(', ')']) self.check_tokenizer('child::text()', ['child', '::', 'text', '(', ')']) + self.check_tokenizer('./ /.', ['.', '/', '', '/', '.']) + self.check_tokenizer('tns :*', ['tns', '', ':', '*']) def test_tokens(self): # Literals @@ -319,8 +321,9 @@ def test_token_source(self): self.check_source("concat('alpha', 'beta', 'gamma')", "concat('alpha', 'beta', 'gamma')") self.check_source('1 +2 * 3 ', '1 + 2 * 3') self.check_source('(1 + 2) * 3', '(1 + 2) * 3') - self.check_source(' eg : example ', 'eg:example') + self.check_source(' eg:example ', 'eg:example') self.check_source('attribute::name="Galileo"', "attribute::name = 'Galileo'") + self.check_source(".//eg:a | .//eg:b", '. // eg:a | . // eg:b') def test_wrong_syntax(self): self.wrong_syntax('') @@ -329,6 +332,8 @@ def test_wrong_syntax(self): self.wrong_syntax("count(0, 1, 2)") self.wrong_syntax("{}egg") self.wrong_syntax("./*:*") + self.wrong_syntax('./ /.') + self.wrong_syntax(' eg : example ') # Features tests def test_references(self):