Fix spacing errors

- Added is_spaced() to TODP Parser base class - Remove '/' from next_is_path_step_token() methods - Increase release and update API documentation
sissaschool · Mar 30, 2019 · 12b71ba · 12b71ba
1 parent 66dcdf2
commit 12b71ba
Show file tree

Hide file tree

Showing 9 changed files with 52 additions and 10 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,11 @@
 CHANGELOG
 *********
 
+`v1.1.7`_ (TBD)
+===============
+* Added Parser.is_spaced() method for checking if the current token has extra spaces before or after
+* Fixes for '/' and ':' tokens
+
 `v1.1.6`_ (2019-03-28)
 ======================
 * Fixes for XSD datatypes
@@ -116,3 +121,4 @@ CHANGELOG
 .. _v1.1.4: https://github.com/brunato/elementpath/compare/v1.1.3...v1.1.4
 .. _v1.1.5: https://github.com/brunato/elementpath/compare/v1.1.4...v1.1.5
 .. _v1.1.6: https://github.com/brunato/elementpath/compare/v1.1.5...v1.1.6
+.. _v1.1.7: https://github.com/brunato/elementpath/compare/v1.1.6...v1.1.7
diff --git a/doc/conf.py b/doc/conf.py
@@ -31,7 +31,7 @@
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '1.1.6'
+release = '1.1.7'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/doc/pratt_api.rst b/doc/pratt_api.rst
@@ -38,6 +38,8 @@ Parser base class
 
 .. autoclass:: elementpath.Parser
 
+    .. autoattribute:: position
+
     Parsing methods:
 
     .. automethod:: build_tokenizer
@@ -46,6 +48,12 @@ Parser base class
     .. automethod:: raw_advance
     .. automethod:: expression
 
+    Helper methods for checking parser status:
+
+    .. automethod:: is_source_start
+    .. automethod:: is_line_start
+    .. automethod:: is_spaced
+
     Helper methods for building effective parser classes:
 
     .. automethod:: register

diff --git a/elementpath/__init__.py b/elementpath/__init__.py
@@ -8,7 +8,7 @@
 #
 # @author Davide Brunato <[email protected]>
 #
-__version__ = '1.1.6'
+__version__ = '1.1.7'
 __author__ = "Davide Brunato"
 __contact__ = "[email protected]"
 __copyright__ = "Copyright 2018-2019, SISSA"

diff --git a/elementpath/tdop_parser.py b/elementpath/tdop_parser.py
@@ -287,12 +287,15 @@ def wrong_syntax(self, message=None):
         symbol = self.value if SPECIAL_SYMBOL_PATTERN.match(self.symbol) is not None else self.symbol
         line_column = 'line %d, column %d' % self.parser.position
         token = self.parser.token
-
         if token is not None and symbol != token.symbol:
-            msg = "unexpected symbol %r after %s at %s." % (symbol, token, line_column)
+            msg = "symbol %r after %s at %s" % (symbol, token, line_column)
+        else:
+            msg = "symbol %r at %s" % (symbol, line_column)
+
+        if message:
+            raise ElementPathSyntaxError('%s: %s' % (msg, message), self)
         else:
-            msg = "unexpected symbol %r at %s." % (symbol, line_column)
-        raise ElementPathSyntaxError(msg + ' ' + message if message else msg, self)
+            raise ElementPathSyntaxError('unexpected %s.' % msg, self)
 
     def wrong_value(self, message='unknown error'):
         raise ElementPathValueError(message, self)
@@ -555,6 +558,24 @@ def is_line_start(self):
         line_start = self.source[0:token_index].rindex('\n') + 1
         return not bool(self.source[line_start:token_index].strip())
 
+    def is_spaced(self, before=True, after=True):
+        """
+        Returns `True` if the source has an extra space (whitespace, tab or newline) immediately
+        before or after the current position of the parser.
+
+        :param before: if `True` considers also the extra spaces before the current token symbol.
+        :param after: if `True` considers also the extra spaces after the current token symbol.
+        """
+        if self.match is None:
+            return False
+        start, end = self.match.span()
+        if before and start > 0 and self.source[start - 1] in ' \t\n':
+            return True
+        try:
+            return after and self.source[end] in ' \t\n'
+        except IndexError:
+            return False
+
     @classmethod
     def register(cls, symbol, **kwargs):
         """

diff --git a/elementpath/xpath1_parser.py b/elementpath/xpath1_parser.py
@@ -178,7 +178,7 @@ def nud_(self):
     def next_is_path_step_token(self):
         return self.next_token.label == 'axis' or self.next_token.symbol in {
             '(integer)', '(string)', '(float)',  '(decimal)', '(name)', 'node', 'text', '*',
-            '@', '..', '.', '(', '/', '{'
+            '@', '..', '.', '(', '{'
         }
 
     def parse(self, source):
@@ -328,6 +328,8 @@ def led(self, left):
     elif left.symbol == '*' and next_token.symbol != '(name)':
         next_token.wrong_syntax()
 
+    if self.parser.is_spaced():
+        self.wrong_syntax("a QName cannot contains spaces before or after ':'")
     self[:] = left, self.parser.expression(90)
     return self
 

diff --git a/elementpath/xpath2_parser.py b/elementpath/xpath2_parser.py
@@ -310,7 +310,7 @@ def evaluate_(self_, context=None):
 
     def next_is_path_step_token(self):
         return self.next_token.label in ('axis', 'function') or self.next_token.symbol in {
-            '(integer)', '(string)', '(float)',  '(decimal)', '(name)', '*', '@', '..', '.', '(', '/', '{'
+            '(integer)', '(string)', '(float)',  '(decimal)', '(name)', '*', '@', '..', '.', '(', '{'
         }
 
     def next_is_sequence_type_token(self):

diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 
 setup(
     name='elementpath',
-    version='1.1.6',
+    version='1.1.7',
     packages=['elementpath'],
     author='Davide Brunato',
     author_email='[email protected]',

diff --git a/tests/test_xpath1_parser.py b/tests/test_xpath1_parser.py
@@ -265,6 +265,8 @@ def test_xpath_tokenizer(self):
         self.check_tokenizer("_last()", ['_last', '(', ')'])
         self.check_tokenizer("last ()", ['last', '', '(', ')'])
         self.check_tokenizer('child::text()', ['child', '::', 'text', '(', ')'])
+        self.check_tokenizer('./ /.', ['.', '/', '', '/', '.'])
+        self.check_tokenizer('tns :*', ['tns', '', ':', '*'])
 
     def test_tokens(self):
         # Literals
@@ -319,8 +321,9 @@ def test_token_source(self):
         self.check_source("concat('alpha', 'beta', 'gamma')", "concat('alpha', 'beta', 'gamma')")
         self.check_source('1 +2 *  3 ', '1 + 2 * 3')
         self.check_source('(1 + 2) * 3', '(1 + 2) * 3')
-        self.check_source(' eg : example ', 'eg:example')
+        self.check_source(' eg:example ', 'eg:example')
         self.check_source('attribute::name="Galileo"', "attribute::name = 'Galileo'")
+        self.check_source(".//eg:a | .//eg:b", '. // eg:a | . // eg:b')
 
     def test_wrong_syntax(self):
         self.wrong_syntax('')
@@ -329,6 +332,8 @@ def test_wrong_syntax(self):
         self.wrong_syntax("count(0, 1, 2)")
         self.wrong_syntax("{}egg")
         self.wrong_syntax("./*:*")
+        self.wrong_syntax('./ /.')
+        self.wrong_syntax(' eg : example ')
 
     # Features tests
     def test_references(self):