Skip to content

Commit

Permalink
Merge pull request #1276 from lark-parser/cleanup_may2023
Browse files Browse the repository at this point in the history
Cleanup may2023
  • Loading branch information
erezsh authored May 25, 2023
2 parents 59aabdb + 41853ff commit 7043c3e
Show file tree
Hide file tree
Showing 13 changed files with 70 additions and 47 deletions.
1 change: 1 addition & 0 deletions lark/lark.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ class Lark(Serialize):
grammar: 'Grammar'
options: LarkOptions
lexer: Lexer
parser: 'ParsingFrontend'
terminals: Collection[TerminalDef]

def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
Expand Down
10 changes: 7 additions & 3 deletions lark/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import re
from contextlib import suppress
from typing import (
TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern, ClassVar, TYPE_CHECKING, overload
TypeVar, Type, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
ClassVar, TYPE_CHECKING, overload
)
from types import ModuleType
import warnings
Expand Down Expand Up @@ -404,7 +404,11 @@ class LexerState:

__slots__ = 'text', 'line_ctr', 'last_token'

def __init__(self, text, line_ctr=None, last_token=None):
text: str
line_ctr: LineCounter
last_token: Optional[Token]

def __init__(self, text: str, line_ctr: Optional[LineCounter]=None, last_token: Optional[Token]=None):
self.text = text
self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
self.last_token = last_token
Expand Down
49 changes: 28 additions & 21 deletions lark/parser_frontends.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Callable, Dict, Tuple
from typing import Any, Callable, Dict, Optional, Collection

from .exceptions import ConfigurationError, GrammarError, assert_config
from .utils import get_regexp_width, Serialize
Expand Down Expand Up @@ -38,7 +38,11 @@ def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
class ParsingFrontend(Serialize):
__serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'

def __init__(self, lexer_conf, parser_conf, options, parser=None):
lexer_conf: LexerConf
parser_conf: ParserConf
options: Any

def __init__(self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None):
self.parser_conf = parser_conf
self.lexer_conf = lexer_conf
self.options = options
Expand All @@ -61,16 +65,17 @@ def __init__(self, lexer_conf, parser_conf, options, parser=None):
self.skip_lexer = True
return

try:
if isinstance(lexer_type, type):
assert issubclass(lexer_type, Lexer)
self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
elif isinstance(lexer_type, str):
create_lexer = {
'basic': create_basic_lexer,
'contextual': create_contextual_lexer,
}[lexer_type]
except KeyError:
assert issubclass(lexer_type, Lexer), lexer_type
self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
else:
self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
else:
raise TypeError("Bad value for lexer_type: {lexer_type}")

if lexer_conf.postlex:
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
Expand All @@ -85,21 +90,23 @@ def _verify_start(self, start=None):
raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
return start

def _make_lexer_thread(self, text):
def _make_lexer_thread(self, text: str):
cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
return text if self.skip_lexer else cls.from_text(self.lexer, text)

def parse(self, text, start=None, on_error=None):
def parse(self, text: str, start=None, on_error=None):
chosen_start = self._verify_start(start)
kw = {} if on_error is None else {'on_error': on_error}
stream = self._make_lexer_thread(text)
return self.parser.parse(stream, chosen_start, **kw)

def parse_interactive(self, text=None, start=None):
def parse_interactive(self, text: Optional[str]=None, start=None):
# TODO BREAK - Change text from Optional[str] to text: str = ''.
# Would break behavior of exhaust_lexer(), which currently raises TypeError, and after the change would just return []
chosen_start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
stream = self._make_lexer_thread(text)
stream = self._make_lexer_thread(text) # type: ignore[arg-type]
return self.parser.parse_interactive(stream, chosen_start)


Expand Down Expand Up @@ -133,17 +140,17 @@ def lex(self, lexer_state, parser_state):



def create_basic_lexer(lexer_conf, parser, postlex, options):
def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
return cls(lexer_conf)

def create_contextual_lexer(lexer_conf, parser, postlex, options):
def create_contextual_lexer(lexer_conf: LexerConf, parser, postlex, options) -> ContextualLexer:
cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
always_accept = postlex.always_accept if postlex else ()
states: Dict[str, Collection[str]] = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
always_accept: Collection[str] = postlex.always_accept if postlex else ()
return cls(lexer_conf, states, always_accept=always_accept)

def create_lalr_parser(lexer_conf, parser_conf, options=None):
def create_lalr_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options=None) -> LALR_Parser:
debug = options.debug if options else False
strict = options.strict if options else False
cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
Expand Down Expand Up @@ -174,7 +181,7 @@ def match(self, term, text, index=0):
return self.regexps[term.name].match(text, index)


def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
def create_earley_parser__dynamic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
if lexer_conf.callbacks:
raise GrammarError("Earley's dynamic lexer doesn't support lexer_callbacks.")

Expand All @@ -184,10 +191,10 @@ def create_earley_parser__dynamic(lexer_conf, parser_conf, options=None, **kw):
def _match_earley_basic(term, token):
return term.name == token.type

def create_earley_parser__basic(lexer_conf, parser_conf, options, **kw):
def create_earley_parser__basic(lexer_conf: LexerConf, parser_conf: ParserConf, **kw):
return earley.Parser(lexer_conf, parser_conf, _match_earley_basic, **kw)

def create_earley_parser(lexer_conf, parser_conf, options):
def create_earley_parser(lexer_conf: LexerConf, parser_conf: ParserConf, options) -> earley.Parser:
resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False
tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
Expand All @@ -196,12 +203,12 @@ def create_earley_parser(lexer_conf, parser_conf, options):
if lexer_conf.lexer_type == 'dynamic':
f = create_earley_parser__dynamic
elif lexer_conf.lexer_type == 'dynamic_complete':
extra['complete_lex'] =True
extra['complete_lex'] = True
f = create_earley_parser__dynamic
else:
f = create_earley_parser__basic

return f(lexer_conf, parser_conf, options, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)
return f(lexer_conf, parser_conf, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class, **extra)



Expand Down
13 changes: 4 additions & 9 deletions lark/parsers/cyk.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
from ..tree import Tree
from ..grammar import Terminal as T, NonTerminal as NT, Symbol

try:
xrange
except NameError:
xrange = range

def match(t, s):
assert isinstance(t, T)
return t.name == s.type
Expand Down Expand Up @@ -153,11 +148,11 @@ def _parse(s, g):
trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)

# Iterate over lengths of sub-sentences
for l in xrange(2, len(s) + 1):
for l in range(2, len(s) + 1):
# Iterate over sub-sentences with the given length
for i in xrange(len(s) - l + 1):
for i in range(len(s) - l + 1):
# Choose partition of the sub-sentence in [1, l)
for p in xrange(i + 1, i + l):
for p in range(i + 1, i + l):
span1 = (i, p - 1)
span2 = (p, i + l - 1)
for r1, r2 in itertools.product(table[span1], table[span2]):
Expand Down Expand Up @@ -262,7 +257,7 @@ def _split(rule):
rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
rule_name = '__SP_%s' % (rule_str) + '_%d'
yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
for i in xrange(1, len(rule.rhs) - 2):
for i in range(1, len(rule.rhs) - 2):
yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')

Expand Down
14 changes: 12 additions & 2 deletions lark/parsers/earley.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
is explained here: https://lark-parser.readthedocs.io/en/latest/_static/sppf/sppf.html
"""

import typing

from collections import deque

from ..lexer import Token
Expand All @@ -20,8 +22,15 @@
from .earley_common import Item
from .earley_forest import ForestSumVisitor, SymbolNode, TokenNode, ForestToParseTree

if typing.TYPE_CHECKING:
from ..common import LexerConf, ParserConf

class Parser:
def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
lexer_conf: 'LexerConf'
parser_conf: 'ParserConf'
debug: bool

def __init__(self, lexer_conf: 'LexerConf', parser_conf: 'ParserConf', term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
analysis = GrammarAnalyzer(parser_conf)
self.lexer_conf = lexer_conf
self.parser_conf = parser_conf
Expand All @@ -32,7 +41,8 @@ def __init__(self, lexer_conf, parser_conf, term_matcher, resolve_ambiguity=True
self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
self.callbacks = parser_conf.callbacks
self.predictions = {}
# TODO add typing info
self.predictions = {} # type: ignore[var-annotated]

## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
# the slow 'isupper' in is_terminal.
Expand Down
4 changes: 2 additions & 2 deletions lark/parsers/earley_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from ..parse_tree_builder import AmbiguousIntermediateExpander
from ..visitors import Discard
from ..lexer import Token
from ..utils import logger
from ..tree import Tree

Expand Down Expand Up @@ -85,7 +84,8 @@ def is_ambiguous(self):
def children(self):
"""Returns a list of this node's children sorted from greatest to
least priority."""
if not self.paths_loaded: self.load_paths()
if not self.paths_loaded:
self.load_paths()
return sorted(self._children, key=attrgetter('sort_key'))

def __iter__(self):
Expand Down
2 changes: 1 addition & 1 deletion lark/parsers/lalr_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def compute_includes_lookback(self):
if nt2 not in self.reads:
continue
for j in range(i + 1, len(rp.rule.expansion)):
if not rp.rule.expansion[j] in self.NULLABLE:
if rp.rule.expansion[j] not in self.NULLABLE:
break
else:
includes.append(nt2)
Expand Down
2 changes: 1 addition & 1 deletion lark/parsers/lalr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..lexer import Token
from ..utils import Serialize

from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable
from .lalr_interactive_parser import InteractiveParser
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken

Expand Down
2 changes: 1 addition & 1 deletion lark/reconstruct.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Reconstruct text from a tree, based on Lark grammar"""

from typing import List, Dict, Union, Callable, Iterable, Optional
from typing import Dict, Callable, Iterable, Optional

from .lark import Lark
from .tree import Tree, ParseTree
Expand Down
4 changes: 1 addition & 3 deletions lark/tools/serialize.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import codecs
import sys
import json

from lark import Lark
from lark.grammar import RuleOptions, Rule
from lark.grammar import Rule
from lark.lexer import TerminalDef
from lark.tools import lalr_argparser, build_lalr

Expand Down
9 changes: 6 additions & 3 deletions lark/tree.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import sys
from copy import deepcopy

from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, Any, TYPE_CHECKING
from typing import List, Callable, Iterator, Union, Optional, Generic, TypeVar, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import TerminalDef, Token
import rich
try:
import rich
except ImportError:
pass
if sys.version_info >= (3, 8):
from typing import Literal
else:
Expand Down Expand Up @@ -86,7 +89,7 @@ def pretty(self, indent_str: str=' ') -> str:
"""
return ''.join(self._pretty(0, indent_str))

def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree':
def __rich__(self, parent:Optional['rich.tree.Tree']=None) -> 'rich.tree.Tree':
"""Returns a tree widget for the 'rich' library.
Example:
Expand Down
1 change: 0 additions & 1 deletion lark/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import unicodedata
import os
from functools import reduce
from itertools import product
from collections import deque
from typing import Callable, Iterator, List, Optional, Tuple, Type, TypeVar, Union, Dict, Any, Sequence, Iterable
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ exclude = [
[[tool.mypy.overrides]]
module = [ "js2py" ]
ignore_missing_imports = true

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:"
]

0 comments on commit 7043c3e

Please sign in to comment.