Merge pull request #1276 from lark-parser/cleanup_may2023

Cleanup may2023
lark-parser · May 25, 2023 · 7043c3e · 7043c3e
2 parents 59aabdb + 41853ff
commit 7043c3e
Show file tree

Hide file tree

Showing 13 changed files with 70 additions and 47 deletions.
diff --git a/lark/lark.py b/lark/lark.py
@@ -258,6 +258,7 @@ class Lark(Serialize):
     grammar: 'Grammar'
     options: LarkOptions
     lexer: Lexer
+    parser: 'ParsingFrontend'
     terminals: Collection[TerminalDef]
 
     def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:

diff --git a/lark/lexer.py b/lark/lexer.py
@@ -4,8 +4,8 @@
 import re
 from contextlib import suppress
 from typing import (
-    TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
-    Pattern as REPattern, ClassVar, TYPE_CHECKING, overload
+    TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
+    ClassVar, TYPE_CHECKING, overload
 )
 from types import ModuleType
 import warnings
@@ -404,7 +404,11 @@ class LexerState:
 
     __slots__ = 'text', 'line_ctr', 'last_token'
 
-    def __init__(self, text, line_ctr=None, last_token=None):
+    text: str
+    line_ctr: LineCounter
+    last_token: Optional[Token]
+
+    def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None):
         self.text = text
         self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
         self.last_token = last_token

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Dict, Tuple
+from typing import Any, Callable, Dict, Optional, Collection
 
 from .exceptions import ConfigurationError, GrammarError, assert_config
 from .utils import get_regexp_width, Serialize
@@ -38,7 +38,11 @@ def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
 class ParsingFrontend(Serialize):
     __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
 
-    def __init__(self, lexer_conf, parser_conf, options, parser=None):
+    lexer_conf: LexerConf
+    parser_conf: ParserConf
+    options: Any
+
+    def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None):
         self.parser_conf = parser_conf
         self.lexer_conf = lexer_conf
         self.options = options
@@ -61,16 +65,17 @@ def __init__(self, lexer_conf, parser_conf, options, parser=None):
             self.skip_lexer = True
             return
 
-        try:
+        if isinstance(lexer_type, type):
+            assert issubclass(lexer_type, Lexer)
+            self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
+        elif isinstance(lexer_type, str):
             create_lexer = {
                 'basic': create_basic_lexer,
                 'contextual': create_contextual_lexer,
             }[lexer_type]
-        except KeyError:
-            assert issubclass(lexer_type, Lexer), lexer_type
-            self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
-        else:
             self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
+        else:
+            raise TypeError("Bad value for lexer_type: {lexer_type}")
 
         if lexer_conf.postlex:
             self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
@@ -85,21 +90,23 @@ def _verify_start(self, start=None):
             raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
         return start
 
-    def _make_lexer_thread(self, text):
+    def _make_lexer_thread(self, text: str):
         cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
         return text if self.skip_lexer else cls.from_text(self.lexer, text)
 
-    def parse(self, text, start=None, on_error=None):
+    def parse(self, text: str, start=None, on_error=None):
         chosen_start = self._verify_start(start)
         kw = {} if on_error is None else {'on_error': on_error}
         stream = self._make_lexer_thread(text)
         return self.parser.parse(stream, chosen_start, **kw)
 
-    def parse_interactive(self, text=None, start=None):
+    def parse_interactive(self, text: Optional[str]=None, start=None):
+        # TODO BREAK - Change text from Optional[str] to text: str = ''.
+        #   Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return []
         chosen_start = self._verify_start(start)
         if self.parser_conf.parser_type != 'lalr':
             raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
-        stream = self._make_lexer_thread(text)
+        stream = self._make_lexer_thread(text)  # type: ignore[arg-type]
         return self.parser.parse_interactive(stream, chosen_start)
 
 
@@ -133,17 +140,17 @@ def lex(self, lexer_state, parser_state):
 
 
 
-def create_basic_lexer(lexer_conf, parser, postlex, options):
+def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
     cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
     return cls(lexer_conf)
 
-def create_contextual_lexer(lexer_conf, parser, postlex, options):
+def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer:
     cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
-    states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
-    always_accept = postlex.always_accept if postlex else ()
+    states: Dict[str, Collection[str]] = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
+    always_accept: Collection[str] = postlex.always_accept if postlex else ()
     return cls(lexer_conf, states, always_accept=always_accept)
 
-def create_lalr_parser(lexer_conf, parser_conf, options=None):
+def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser:
     debug = options.debug if options else False
     strict = options.strict if options else False
     cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
@@ -174,7 +181,7 @@ def match(self, term, text, index=0):
         return self.regexps[term.name].match(text, index)
 
 
-def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
+def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
     if lexer_conf.callbacks:
         raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.")
 
@@ -184,10 +191,10 @@ def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
 def _match_earley_basic(term, token):
     return term.name == token.type
 
-def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
+def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
     return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw)
 
-def create_earley_parser(lexer_conf, parser_conf, options):
+def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser:
     resolve_ambiguity = options.ambiguity == 'resolve'
     debug = options.debug if options else False
     tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
@@ -196,12 +203,12 @@ def create_earley_parser(lexer_conf, parser_conf, options):
     if lexer_conf.lexer_type == 'dynamic':
         f = create_earley_parser__dynamic
     elif lexer_conf.lexer_type == 'dynamic_complete':
-        extra['complete_lex'] =True
+        extra['complete_lex'] = True
         f = create_earley_parser__dynamic
     else:
         f = create_earley_parser__basic
 
-    return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
+    return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
 
 
 

diff --git a/lark/parsers/cyk.py b/lark/parsers/cyk.py
@@ -13,11 +13,6 @@
 from ..tree import Tree
 from ..grammar import Terminal as T, NonTerminal as NT, Symbol
 
-try:
-    xrange
-except NameError:
-    xrange = range
-
 def match(t, s):
     assert isinstance(t, T)
     return t.name == s.type
@@ -153,11 +148,11 @@ def _parse(s, g):
                         trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
 
     # Iterate over lengths of sub-sentences
-    for l in xrange(2, len(s) + 1):
+    for l in range(2, len(s) + 1):
         # Iterate over sub-sentences with the given length
-        for i in xrange(len(s) - l + 1):
+        for i in range(len(s) - l + 1):
             # Choose partition of the sub-sentence in [1, l)
-            for p in xrange(i + 1, i + l):
+            for p in range(i + 1, i + l):
                 span1 = (i, p - 1)
                 span2 = (p, i + l - 1)
                 for r1, r2 in itertools.product(table[span1], table[span2]):
@@ -262,7 +257,7 @@ def _split(rule):
     rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
     rule_name = '__SP_%s' % (rule_str) + '_%d'
     yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
-    for i in xrange(1, len(rule.rhs) - 2):
+    for i in range(1, len(rule.rhs) - 2):
         yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
     yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')
 

diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
@@ -9,6 +9,8 @@
 is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
 """
 
+import typing
+
 from collections import deque
 
 from ..lexer import Token
@@ -20,8 +22,15 @@
 from .earley_common import Item
 from .earley_forest import ForestSumVisitor, SymbolNode, TokenNode, ForestToParseTree
 
+if typing.TYPE_CHECKING:
+    from ..common import LexerConf, ParserConf
+
 class Parser:
-    def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
+    lexer_conf: 'LexerConf'
+    parser_conf: 'ParserConf'
+    debug: bool
+
+    def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
         analysis = GrammarAnalyzer(parser_conf)
         self.lexer_conf = lexer_conf
         self.parser_conf = parser_conf
@@ -32,7 +41,8 @@ def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True
         self.FIRST = analysis.FIRST
         self.NULLABLE = analysis.NULLABLE
         self.callbacks = parser_conf.callbacks
-        self.predictions = {}
+        # TODO add typing info
+        self.predictions = {}   # type: ignore[var-annotated]
 
         ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
         #  the slow 'isupper' in is_terminal.

diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py
@@ -15,7 +15,6 @@
 
 from ..parse_tree_builder import AmbiguousIntermediateExpander
 from ..visitors import Discard
-from ..lexer import Token
 from ..utils import logger
 from ..tree import Tree
 
@@ -85,7 +84,8 @@ def is_ambiguous(self):
     def children(self):
         """Returns a list of this node's children sorted from greatest to
         least priority."""
-        if not self.paths_loaded: self.load_paths()
+        if not self.paths_loaded:
+            self.load_paths()
         return sorted(self._children, key=attrgetter('sort_key'))
 
     def __iter__(self):

diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
@@ -220,7 +220,7 @@ def compute_includes_lookback(self):
                     if nt2 not in self.reads:
                         continue
                     for j in range(i + 1, len(rp.rule.expansion)):
-                        if not rp.rule.expansion[j] in self.NULLABLE:
+                        if rp.rule.expansion[j] not in self.NULLABLE:
                             break
                     else:
                         includes.append(nt2)

diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
@@ -7,7 +7,7 @@
 from ..lexer import Token
 from ..utils import Serialize
 
-from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
+from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable
 from .lalr_interactive_parser import InteractiveParser
 from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken
 

diff --git a/lark/reconstruct.py b/lark/reconstruct.py
@@ -1,6 +1,6 @@
 """Reconstruct text from a tree, based on Lark grammar"""
 
-from typing import List, Dict, Union, Callable, Iterable, Optional
+from typing import Dict, Callable, Iterable, Optional
 
 from .lark import Lark
 from .tree import Tree, ParseTree

diff --git a/lark/tools/serialize.py b/lark/tools/serialize.py
@@ -1,9 +1,7 @@
-import codecs
 import sys
 import json
 
-from lark import Lark
-from lark.grammar import RuleOptions, Rule
+from lark.grammar import Rule
 from lark.lexer import TerminalDef
 from lark.tools import lalr_argparser, build_lalr
 

diff --git a/lark/tree.py b/lark/tree.py
@@ -1,11 +1,14 @@
 import sys
 from copy import deepcopy
 
-from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, Any, TYPE_CHECKING
+from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING
 
 if TYPE_CHECKING:
     from .lexer import TerminalDef, Token
-    import rich
+    try:
+        import rich
+    except ImportError:
+        pass
     if sys.version_info >= (3, 8):
         from typing import Literal
     else:
@@ -86,7 +89,7 @@ def pretty(self, indent_str: str='  ') -> str:
         """
         return ''.join(self._pretty(0, indent_str))
 
-    def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree':
+    def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree':
         """Returns a tree widget for the 'rich' library.
 
         Example:

diff --git a/lark/utils.py b/lark/utils.py
@@ -1,6 +1,5 @@
 import unicodedata
 import os
-from functools import reduce
 from itertools import product
 from collections import deque
 from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence, Iterable

diff --git a/pyproject.toml b/pyproject.toml
@@ -16,3 +16,9 @@ exclude = [
 [[tool.mypy.overrides]]
 module = [ "js2py" ]
 ignore_missing_imports = true
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "if TYPE_CHECKING:"
+]