1# The file was automatically generated by Lark v1.1.2
2__version__ = "1.1.2"
3
4#
5#
6# Lark Stand-alone Generator Tool
7# ----------------------------------
8# Generates a stand-alone LALR(1) parser
9#
10# Git: https://github.com/erezsh/lark
11# Author: Erez Shinan (erezshin@gmail.com)
12#
13#
14# >>> LICENSE
15#
16# This tool and its generated code use a separate license from Lark,
17# and are subject to the terms of the Mozilla Public License, v. 2.0.
18# If a copy of the MPL was not distributed with this
19# file, You can obtain one at https://mozilla.org/MPL/2.0/.
20#
21# If you wish to purchase a commercial license for this tool and its
22# generated code, you may contact me via email or otherwise.
23#
24# If MPL2 is incompatible with your free or open-source project,
25# contact me and we'll work it out.
26#
27#
28
29from abc import ABC, abstractmethod
30from collections.abc import Sequence
31from types import ModuleType
32from typing import (
33 IO,
34 TYPE_CHECKING,
35 Any,
36 Callable,
37 ClassVar,
38 Collection,
39 Dict,
40 FrozenSet,
41 Generic,
42 Iterable,
43 Iterator,
44 List,
45 Mapping,
46 Optional,
47)
48from typing import Pattern as REPattern
49from typing import Set, Tuple, Type, TypeVar, Union
50
51
52class LarkError(Exception):
53 pass
54
55
56class ConfigurationError(LarkError, ValueError):
57 pass
58
59
60def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
61 if value not in options:
62 raise ConfigurationError(msg % (value, options))
63
64
65class GrammarError(LarkError):
66 pass
67
68
69class ParseError(LarkError):
70 pass
71
72
73class LexError(LarkError):
74 pass
75
76T = TypeVar('T')
77
78class UnexpectedInput(LarkError):
79 #--
80 line: int
81 column: int
82 pos_in_stream = None
83 state: Any
84 _terminals_by_name = None
85
86 def get_context(self, text: str, span: int=40) -> str:
87 #--
88 assert self.pos_in_stream is not None, self
89 pos = self.pos_in_stream
90 start = max(pos - span, 0)
91 end = pos + span
92 if not isinstance(text, bytes):
93 before = text[start:pos].rsplit('\n', 1)[-1]
94 after = text[pos:end].split('\n', 1)[0]
95 return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
96 else:
97 before = text[start:pos].rsplit(b'\n', 1)[-1]
98 after = text[pos:end].split(b'\n', 1)[0]
99 return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
100
101 def match_examples(self, parse_fn: 'Callable[[str], Tree]',
102 examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
103 token_type_match_fallback: bool=False,
104 use_accepts: bool=True
105 ) -> Optional[T]:
106 #--
107 assert self.state is not None, "Not supported for this exception"
108
109 if isinstance(examples, Mapping):
110 examples = examples.items()
111
112 candidate = (None, False)
113 for i, (label, example) in enumerate(examples):
114 assert not isinstance(example, str), "Expecting a list"
115
116 for j, malformed in enumerate(example):
117 try:
118 parse_fn(malformed)
119 except UnexpectedInput as ut:
120 if ut.state == self.state:
121 if (
122 use_accepts
123 and isinstance(self, UnexpectedToken)
124 and isinstance(ut, UnexpectedToken)
125 and ut.accepts != self.accepts
126 ):
127 logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
128 (self.state, self.accepts, ut.accepts, i, j))
129 continue
130 if (
131 isinstance(self, (UnexpectedToken, UnexpectedEOF))
132 and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
133 ):
134 if ut.token == self.token: ##
135
136 logger.debug("Exact Match at example [%s][%s]" % (i, j))
137 return label
138
139 if token_type_match_fallback:
140 ##
141
142 if (ut.token.type == self.token.type) and not candidate[-1]:
143 logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
144 candidate = label, True
145
146 if candidate[0] is None:
147 logger.debug("Same State match at example [%s][%s]" % (i, j))
148 candidate = label, False
149
150 return candidate[0]
151
152 def _format_expected(self, expected):
153 if self._terminals_by_name:
154 d = self._terminals_by_name
155 expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
156 return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
157
158
159class UnexpectedEOF(ParseError, UnexpectedInput):
160 #--
161 expected: 'List[Token]'
162
163 def __init__(self, expected, state=None, terminals_by_name=None):
164 super(UnexpectedEOF, self).__init__()
165
166 self.expected = expected
167 self.state = state
168 from .lexer import Token
169 self.token = Token("<EOF>", "") ##
170
171 self.pos_in_stream = -1
172 self.line = -1
173 self.column = -1
174 self._terminals_by_name = terminals_by_name
175
176
177 def __str__(self):
178 message = "Unexpected end-of-input. "
179 message += self._format_expected(self.expected)
180 return message
181
182
183class UnexpectedCharacters(LexError, UnexpectedInput):
184 #--
185
186 allowed: Set[str]
187 considered_tokens: Set[Any]
188
189 def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
190 terminals_by_name=None, considered_rules=None):
191 super(UnexpectedCharacters, self).__init__()
192
193 ##
194
195 self.line = line
196 self.column = column
197 self.pos_in_stream = lex_pos
198 self.state = state
199 self._terminals_by_name = terminals_by_name
200
201 self.allowed = allowed
202 self.considered_tokens = considered_tokens
203 self.considered_rules = considered_rules
204 self.token_history = token_history
205
206 if isinstance(seq, bytes):
207 self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
208 else:
209 self.char = seq[lex_pos]
210 self._context = self.get_context(seq)
211
212
213 def __str__(self):
214 message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
215 message += '\n\n' + self._context
216 if self.allowed:
217 message += self._format_expected(self.allowed)
218 if self.token_history:
219 message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
220 return message
221
222
223class UnexpectedToken(ParseError, UnexpectedInput):
224 #--
225
226 expected: Set[str]
227 considered_rules: Set[str]
228 interactive_parser: 'InteractiveParser'
229
230 def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
231 super(UnexpectedToken, self).__init__()
232
233 ##
234
235 self.line = getattr(token, 'line', '?')
236 self.column = getattr(token, 'column', '?')
237 self.pos_in_stream = getattr(token, 'start_pos', None)
238 self.state = state
239
240 self.token = token
241 self.expected = expected ##
242
243 self._accepts = NO_VALUE
244 self.considered_rules = considered_rules
245 self.interactive_parser = interactive_parser
246 self._terminals_by_name = terminals_by_name
247 self.token_history = token_history
248
249
250 @property
251 def accepts(self) -> Set[str]:
252 if self._accepts is NO_VALUE:
253 self._accepts = self.interactive_parser and self.interactive_parser.accepts()
254 return self._accepts
255
256 def __str__(self):
257 message = ("Unexpected token %r at line %s, column %s.\n%s"
258 % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
259 if self.token_history:
260 message += "Previous tokens: %r\n" % self.token_history
261
262 return message
263
264
265
266class VisitError(LarkError):
267 #--
268
269 obj: 'Union[Tree, Token]'
270 orig_exc: Exception
271
272 def __init__(self, rule, obj, orig_exc):
273 message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
274 super(VisitError, self).__init__(message)
275
276 self.rule = rule
277 self.obj = obj
278 self.orig_exc = orig_exc
279
280
281class MissingVariableError(LarkError):
282 pass
283
284
285import logging
286import re
287import sys
288
289logger: logging.Logger = logging.getLogger("lark")
290logger.addHandler(logging.StreamHandler())
291##
292
293##
294
295logger.setLevel(logging.CRITICAL)
296
297
298NO_VALUE = object()
299
300
301def classify(seq, key=None, value=None):
302 d = {}
303 for item in seq:
304 k = key(item) if (key is not None) else item
305 v = value(item) if (value is not None) else item
306 if k in d:
307 d[k].append(v)
308 else:
309 d[k] = [v]
310 return d
311
312
313def _deserialize(data, namespace, memo):
314 if isinstance(data, dict):
315 if '__type__' in data: ##
316
317 class_ = namespace[data['__type__']]
318 return class_.deserialize(data, memo)
319 elif '@' in data:
320 return memo[data['@']]
321 return {key:_deserialize(value, namespace, memo) for key, value in data.items()}
322 elif isinstance(data, list):
323 return [_deserialize(value, namespace, memo) for value in data]
324 return data
325
326
327class Serialize:
328 #--
329
330 def memo_serialize(self, types_to_memoize):
331 memo = SerializeMemoizer(types_to_memoize)
332 return self.serialize(memo), memo.serialize()
333
334 def serialize(self, memo=None):
335 if memo and memo.in_types(self):
336 return {'@': memo.memoized.get(self)}
337
338 fields = getattr(self, '__serialize_fields__')
339 res = {f: _serialize(getattr(self, f), memo) for f in fields}
340 res['__type__'] = type(self).__name__
341 if hasattr(self, '_serialize'):
342 self._serialize(res, memo)
343 return res
344
345 @classmethod
346 def deserialize(cls, data, memo):
347 namespace = getattr(cls, '__serialize_namespace__', [])
348 namespace = {c.__name__:c for c in namespace}
349
350 fields = getattr(cls, '__serialize_fields__')
351
352 if '@' in data:
353 return memo[data['@']]
354
355 inst = cls.__new__(cls)
356 for f in fields:
357 try:
358 setattr(inst, f, _deserialize(data[f], namespace, memo))
359 except KeyError as e:
360 raise KeyError("Cannot find key for class", cls, e)
361
362 if hasattr(inst, '_deserialize'):
363 inst._deserialize()
364
365 return inst
366
367
368class SerializeMemoizer(Serialize):
369 #--
370
371 __serialize_fields__ = 'memoized',
372
373 def __init__(self, types_to_memoize):
374 self.types_to_memoize = tuple(types_to_memoize)
375 self.memoized = Enumerator()
376
377 def in_types(self, value):
378 return isinstance(value, self.types_to_memoize)
379
380 def serialize(self):
381 return _serialize(self.memoized.reversed(), None)
382
383 @classmethod
384 def deserialize(cls, data, namespace, memo):
385 return _deserialize(data, namespace, memo)
386
387
388try:
389 import regex # #
390
391except ImportError:
392 regex = None
393
394import sre_constants
395import sre_parse
396
397categ_pattern = re.compile(r'\\p{[A-Za-z_]+}')
398
399def get_regexp_width(expr):
400 if regex:
401 ##
402
403 ##
404
405 ##
406
407 regexp_final = re.sub(categ_pattern, 'A', expr)
408 else:
409 if re.search(categ_pattern, expr):
410 raise ImportError('`regex` module must be installed in order to use Unicode categories.', expr)
411 regexp_final = expr
412 try:
413 return [int(x) for x in sre_parse.parse(regexp_final).getwidth()]
414 except sre_constants.error:
415 if not regex:
416 raise ValueError(expr)
417 else:
418 ##
419
420 ##
421
422 c = regex.compile(regexp_final)
423 if c.match('') is None:
424 ##
425
426 return 1, int(sre_constants.MAXREPEAT)
427 else:
428 return 0, int(sre_constants.MAXREPEAT)
429
430
431from collections import OrderedDict
432
433
434class Meta:
435
436 empty: bool
437 line: int
438 column: int
439 start_pos: int
440 end_line: int
441 end_column: int
442 end_pos: int
443 orig_expansion: 'List[TerminalDef]'
444 match_tree: bool
445
446 def __init__(self):
447 self.empty = True
448
449
450_Leaf_T = TypeVar("_Leaf_T")
451Branch = Union[_Leaf_T, 'Tree[_Leaf_T]']
452
453
454class Tree(Generic[_Leaf_T]):
455 #--
456
457 data: str
458 children: 'List[Branch[_Leaf_T]]'
459
460 def __init__(self, data: str, children: 'List[Branch[_Leaf_T]]', meta: Optional[Meta]=None) -> None:
461 self.data = data
462 self.children = children
463 self._meta = meta
464
465 @property
466 def meta(self) -> Meta:
467 if self._meta is None:
468 self._meta = Meta()
469 return self._meta
470
471 def __repr__(self):
472 return 'Tree(%r, %r)' % (self.data, self.children)
473
474 def _pretty_label(self):
475 return self.data
476
477 def _pretty(self, level, indent_str):
478 if len(self.children) == 1 and not isinstance(self.children[0], Tree):
479 return [indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n']
480
481 l = [indent_str*level, self._pretty_label(), '\n']
482 for n in self.children:
483 if isinstance(n, Tree):
484 l += n._pretty(level+1, indent_str)
485 else:
486 l += [indent_str*(level+1), '%s' % (n,), '\n']
487
488 return l
489
490 def pretty(self, indent_str: str=' ') -> str:
491 #--
492 return ''.join(self._pretty(0, indent_str))
493
494 def __rich__(self, parent:'rich.tree.Tree'=None) -> 'rich.tree.Tree':
495 #--
496 return self._rich(parent)
497
498 def _rich(self, parent):
499 if parent:
500 tree = parent.add(f'[bold]{self.data}[/bold]')
501 else:
502 import rich.tree
503 tree = rich.tree.Tree(self.data)
504
505 for c in self.children:
506 if isinstance(c, Tree):
507 c._rich(tree)
508 else:
509 tree.add(f'[green]{c}[/green]')
510
511 return tree
512
513 def __eq__(self, other):
514 try:
515 return self.data == other.data and self.children == other.children
516 except AttributeError:
517 return False
518
519 def __ne__(self, other):
520 return not (self == other)
521
522 def __hash__(self) -> int:
523 return hash((self.data, tuple(self.children)))
524
525 def iter_subtrees(self) -> 'Iterator[Tree[_Leaf_T]]':
526 #--
527 queue = [self]
528 subtrees = OrderedDict()
529 for subtree in queue:
530 subtrees[id(subtree)] = subtree
531 ##
532
533 queue += [c for c in reversed(subtree.children) ##
534
535 if isinstance(c, Tree) and id(c) not in subtrees]
536
537 del queue
538 return reversed(list(subtrees.values()))
539
540 def find_pred(self, pred: 'Callable[[Tree[_Leaf_T]], bool]') -> 'Iterator[Tree[_Leaf_T]]':
541 #--
542 return filter(pred, self.iter_subtrees())
543
544 def find_data(self, data: str) -> 'Iterator[Tree[_Leaf_T]]':
545 #--
546 return self.find_pred(lambda t: t.data == data)
547
548
549from functools import update_wrapper, wraps
550from inspect import getmembers, getmro
551
552_Return_T = TypeVar('_Return_T')
553_Return_V = TypeVar('_Return_V')
554_Leaf_T = TypeVar('_Leaf_T')
555_Leaf_U = TypeVar('_Leaf_U')
556_R = TypeVar('_R')
557_FUNC = Callable[..., _Return_T]
558_DECORATED = Union[_FUNC, type]
559
560class _DiscardType:
561 #--
562
563 def __repr__(self):
564 return "lark.visitors.Discard"
565
566Discard = _DiscardType()
567
568##
569
570
571class _Decoratable:
572 #--
573
574 @classmethod
575 def _apply_v_args(cls, visit_wrapper):
576 mro = getmro(cls)
577 assert mro[0] is cls
578 libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
579 for name, value in getmembers(cls):
580
581 ##
582
583 if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
584 continue
585 if not callable(value):
586 continue
587
588 ##
589
590 if isinstance(cls.__dict__[name], _VArgsWrapper):
591 continue
592
593 setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
594 return cls
595
596 def __class_getitem__(cls, _):
597 return cls
598
599
600class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
601 #--
602 __visit_tokens__ = True ##
603
604
605 def __init__(self, visit_tokens: bool=True) -> None:
606 self.__visit_tokens__ = visit_tokens
607
608 def _call_userfunc(self, tree, new_children=None):
609 ##
610
611 children = new_children if new_children is not None else tree.children
612 try:
613 f = getattr(self, tree.data)
614 except AttributeError:
615 return self.__default__(tree.data, children, tree.meta)
616 else:
617 try:
618 wrapper = getattr(f, 'visit_wrapper', None)
619 if wrapper is not None:
620 return f.visit_wrapper(f, tree.data, children, tree.meta)
621 else:
622 return f(children)
623 except GrammarError:
624 raise
625 except Exception as e:
626 raise VisitError(tree.data, tree, e)
627
628 def _call_userfunc_token(self, token):
629 try:
630 f = getattr(self, token.type)
631 except AttributeError:
632 return self.__default_token__(token)
633 else:
634 try:
635 return f(token)
636 except GrammarError:
637 raise
638 except Exception as e:
639 raise VisitError(token.type, token, e)
640
641 def _transform_children(self, children):
642 for c in children:
643 if isinstance(c, Tree):
644 res = self._transform_tree(c)
645 elif self.__visit_tokens__ and isinstance(c, Token):
646 res = self._call_userfunc_token(c)
647 else:
648 res = c
649
650 if res is not Discard:
651 yield res
652
653 def _transform_tree(self, tree):
654 children = list(self._transform_children(tree.children))
655 return self._call_userfunc(tree, children)
656
657 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
658 #--
659 return self._transform_tree(tree)
660
661 def __mul__(
662 self: 'Transformer[_Leaf_T, Tree[_Leaf_U]]',
663 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]'
664 ) -> 'TransformerChain[_Leaf_T, _Return_V]':
665 #--
666 return TransformerChain(self, other)
667
668 def __default__(self, data, children, meta):
669 #--
670 return Tree(data, children, meta)
671
672 def __default_token__(self, token):
673 #--
674 return token
675
676
677def merge_transformers(base_transformer=None, **transformers_to_merge):
678 #--
679 if base_transformer is None:
680 base_transformer = Transformer()
681 for prefix, transformer in transformers_to_merge.items():
682 for method_name in dir(transformer):
683 method = getattr(transformer, method_name)
684 if not callable(method):
685 continue
686 if method_name.startswith("_") or method_name == "transform":
687 continue
688 prefixed_method = prefix + "__" + method_name
689 if hasattr(base_transformer, prefixed_method):
690 raise AttributeError("Cannot merge: method '%s' appears more than once" % prefixed_method)
691
692 setattr(base_transformer, prefixed_method, method)
693
694 return base_transformer
695
696
697class InlineTransformer(Transformer): ##
698
699 def _call_userfunc(self, tree, new_children=None):
700 ##
701
702 children = new_children if new_children is not None else tree.children
703 try:
704 f = getattr(self, tree.data)
705 except AttributeError:
706 return self.__default__(tree.data, children, tree.meta)
707 else:
708 return f(*children)
709
710
711class TransformerChain(Generic[_Leaf_T, _Return_T]):
712
713 transformers: 'Tuple[Union[Transformer, TransformerChain], ...]'
714
715 def __init__(self, *transformers: 'Union[Transformer, TransformerChain]') -> None:
716 self.transformers = transformers
717
718 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
719 for t in self.transformers:
720 tree = t.transform(tree)
721 return cast(_Return_T, tree)
722
723 def __mul__(
724 self: 'TransformerChain[_Leaf_T, Tree[_Leaf_U]]',
725 other: 'Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]'
726 ) -> 'TransformerChain[_Leaf_T, _Return_V]':
727 return TransformerChain(*self.transformers + (other,))
728
729
730class Transformer_InPlace(Transformer):
731 #--
732 def _transform_tree(self, tree): ##
733
734 return self._call_userfunc(tree)
735
736 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
737 for subtree in tree.iter_subtrees():
738 subtree.children = list(self._transform_children(subtree.children))
739
740 return self._transform_tree(tree)
741
742
743class Transformer_NonRecursive(Transformer):
744 #--
745
746 def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
747 ##
748
749 rev_postfix = []
750 q: List[Branch[_Leaf_T]] = [tree]
751 while q:
752 t = q.pop()
753 rev_postfix.append(t)
754 if isinstance(t, Tree):
755 q += t.children
756
757 ##
758
759 stack: List = []
760 for x in reversed(rev_postfix):
761 if isinstance(x, Tree):
762 size = len(x.children)
763 if size:
764 args = stack[-size:]
765 del stack[-size:]
766 else:
767 args = []
768
769 res = self._call_userfunc(x, args)
770 if res is not Discard:
771 stack.append(res)
772
773 elif self.__visit_tokens__ and isinstance(x, Token):
774 res = self._call_userfunc_token(x)
775 if res is not Discard:
776 stack.append(res)
777 else:
778 stack.append(x)
779
780 result, = stack ##
781
782 ##
783
784 ##
785
786 ##
787
788 return cast(_Return_T, result)
789
790
791class Transformer_InPlaceRecursive(Transformer):
792 #--
793 def _transform_tree(self, tree):
794 tree.children = list(self._transform_children(tree.children))
795 return self._call_userfunc(tree)
796
797
798##
799
800
801class VisitorBase:
802 def _call_userfunc(self, tree):
803 return getattr(self, tree.data, self.__default__)(tree)
804
805 def __default__(self, tree):
806 #--
807 return tree
808
809 def __class_getitem__(cls, _):
810 return cls
811
812
813class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
814 #--
815
816 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
817 #--
818 for subtree in tree.iter_subtrees():
819 self._call_userfunc(subtree)
820 return tree
821
822 def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
823 #--
824 for subtree in tree.iter_subtrees_topdown():
825 self._call_userfunc(subtree)
826 return tree
827
828
829class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
830 #--
831
832 def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
833 #--
834 for child in tree.children:
835 if isinstance(child, Tree):
836 self.visit(child)
837
838 self._call_userfunc(tree)
839 return tree
840
841 def visit_topdown(self,tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
842 #--
843 self._call_userfunc(tree)
844
845 for child in tree.children:
846 if isinstance(child, Tree):
847 self.visit_topdown(child)
848
849 return tree
850
851
852class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
853 #--
854
855 def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
856 ##
857
858 ##
859
860 ##
861
862 return self._visit_tree(tree)
863
864 def _visit_tree(self, tree: Tree[_Leaf_T]):
865 f = getattr(self, tree.data)
866 wrapper = getattr(f, 'visit_wrapper', None)
867 if wrapper is not None:
868 return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
869 else:
870 return f(tree)
871
872 def visit_children(self, tree: Tree[_Leaf_T]) -> List:
873 return [self._visit_tree(child) if isinstance(child, Tree) else child
874 for child in tree.children]
875
876 def __getattr__(self, name):
877 return self.__default__
878
879 def __default__(self, tree):
880 return self.visit_children(tree)
881
882
883_InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
884
885def visit_children_decor(func: _InterMethod) -> _InterMethod:
886 #--
887 @wraps(func)
888 def inner(cls, tree):
889 values = cls.visit_children(tree)
890 return func(cls, values)
891 return inner
892
893##
894
895
896def _apply_v_args(obj, visit_wrapper):
897 try:
898 _apply = obj._apply_v_args
899 except AttributeError:
900 return _VArgsWrapper(obj, visit_wrapper)
901 else:
902 return _apply(visit_wrapper)
903
904
905class _VArgsWrapper:
906 #--
907 base_func: Callable
908
909 def __init__(self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]):
910 if isinstance(func, _VArgsWrapper):
911 func = func.base_func
912 ##
913
914 self.base_func = func ##
915
916 self.visit_wrapper = visit_wrapper
917 update_wrapper(self, func)
918
919 def __call__(self, *args, **kwargs):
920 return self.base_func(*args, **kwargs)
921
922 def __get__(self, instance, owner=None):
923 try:
924 g = self.base_func.__get__
925 except AttributeError:
926 return self
927 else:
928 return _VArgsWrapper(g(instance, owner), self.visit_wrapper)
929
930 def __set_name__(self, owner, name):
931 try:
932 f = self.base_func.__set_name__
933 except AttributeError:
934 return
935 else:
936 f(owner, name)
937
938
939def _vargs_inline(f, _data, children, _meta):
940 return f(*children)
941def _vargs_meta_inline(f, _data, children, meta):
942 return f(meta, *children)
943def _vargs_meta(f, _data, children, meta):
944 return f(meta, children)
945def _vargs_tree(f, data, children, meta):
946 return f(Tree(data, children, meta))
947
948
949def v_args(inline: bool = False, meta: bool = False, tree: bool = False, wrapper: Optional[Callable] = None) -> Callable[[_DECORATED], _DECORATED]:
950 #--
951 if tree and (meta or inline):
952 raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
953
954 func = None
955 if meta:
956 if inline:
957 func = _vargs_meta_inline
958 else:
959 func = _vargs_meta
960 elif inline:
961 func = _vargs_inline
962 elif tree:
963 func = _vargs_tree
964
965 if wrapper is not None:
966 if func is not None:
967 raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
968 func = wrapper
969
970 def _visitor_args_dec(obj):
971 return _apply_v_args(obj, func)
972 return _visitor_args_dec
973
974
975
976TOKEN_DEFAULT_PRIORITY = 0
977
978
979class Symbol(Serialize):
980 __slots__ = ('name',)
981
982 name: str
983 is_term: ClassVar[bool] = NotImplemented
984
985 def __init__(self, name: str) -> None:
986 self.name = name
987
988 def __eq__(self, other):
989 assert isinstance(other, Symbol), other
990 return self.is_term == other.is_term and self.name == other.name
991
992 def __ne__(self, other):
993 return not (self == other)
994
995 def __hash__(self):
996 return hash(self.name)
997
998 def __repr__(self):
999 return '%s(%r)' % (type(self).__name__, self.name)
1000
1001 fullrepr = property(__repr__)
1002
1003 def renamed(self, f):
1004 return type(self)(f(self.name))
1005
1006
1007class Terminal(Symbol):
1008 __serialize_fields__ = 'name', 'filter_out'
1009
1010 is_term: ClassVar[bool] = True
1011
1012 def __init__(self, name, filter_out=False):
1013 self.name = name
1014 self.filter_out = filter_out
1015
1016 @property
1017 def fullrepr(self):
1018 return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)
1019
1020 def renamed(self, f):
1021 return type(self)(f(self.name), self.filter_out)
1022
1023
1024class NonTerminal(Symbol):
1025 __serialize_fields__ = 'name',
1026
1027 is_term: ClassVar[bool] = False
1028
1029
1030class RuleOptions(Serialize):
1031 __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'
1032
1033 keep_all_tokens: bool
1034 expand1: bool
1035 priority: Optional[int]
1036 template_source: Optional[str]
1037 empty_indices: Tuple[bool, ...]
1038
1039 def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
1040 self.keep_all_tokens = keep_all_tokens
1041 self.expand1 = expand1
1042 self.priority = priority
1043 self.template_source = template_source
1044 self.empty_indices = empty_indices
1045
1046 def __repr__(self):
1047 return 'RuleOptions(%r, %r, %r, %r)' % (
1048 self.keep_all_tokens,
1049 self.expand1,
1050 self.priority,
1051 self.template_source
1052 )
1053
1054
1055class Rule(Serialize):
1056 #--
1057 __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
1058
1059 __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
1060 __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
1061
1062 def __init__(self, origin, expansion, order=0, alias=None, options=None):
1063 self.origin = origin
1064 self.expansion = expansion
1065 self.alias = alias
1066 self.order = order
1067 self.options = options or RuleOptions()
1068 self._hash = hash((self.origin, tuple(self.expansion)))
1069
1070 def _deserialize(self):
1071 self._hash = hash((self.origin, tuple(self.expansion)))
1072
1073 def __str__(self):
1074 return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
1075
1076 def __repr__(self):
1077 return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)
1078
1079 def __hash__(self):
1080 return self._hash
1081
1082 def __eq__(self, other):
1083 if not isinstance(other, Rule):
1084 return False
1085 return self.origin == other.origin and self.expansion == other.expansion
1086
1087
1088
1089from copy import copy
1090
1091
1092class Pattern(Serialize, ABC):
1093
1094 value: str
1095 flags: Collection[str]
1096 raw: Optional[str]
1097 type: ClassVar[str]
1098
1099 def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None:
1100 self.value = value
1101 self.flags = frozenset(flags)
1102 self.raw = raw
1103
1104 def __repr__(self):
1105 return repr(self.to_regexp())
1106
1107 ##
1108
1109 def __hash__(self):
1110 return hash((type(self), self.value, self.flags))
1111
1112 def __eq__(self, other):
1113 return type(self) == type(other) and self.value == other.value and self.flags == other.flags
1114
1115 @abstractmethod
1116 def to_regexp(self) -> str:
1117 raise NotImplementedError()
1118
1119 @property
1120 @abstractmethod
1121 def min_width(self) -> int:
1122 raise NotImplementedError()
1123
1124 @property
1125 @abstractmethod
1126 def max_width(self) -> int:
1127 raise NotImplementedError()
1128
1129 def _get_flags(self, value):
1130 for f in self.flags:
1131 value = ('(?%s:%s)' % (f, value))
1132 return value
1133
1134
1135class PatternStr(Pattern):
1136 __serialize_fields__ = 'value', 'flags'
1137
1138 type: ClassVar[str] = "str"
1139
1140 def to_regexp(self) -> str:
1141 return self._get_flags(re.escape(self.value))
1142
1143 @property
1144 def min_width(self) -> int:
1145 return len(self.value)
1146
1147 @property
1148 def max_width(self) -> int:
1149 return len(self.value)
1150
1151
1152class PatternRE(Pattern):
1153 __serialize_fields__ = 'value', 'flags', '_width'
1154
1155 type: ClassVar[str] = "re"
1156
1157 def to_regexp(self) -> str:
1158 return self._get_flags(self.value)
1159
1160 _width = None
1161 def _get_width(self):
1162 if self._width is None:
1163 self._width = get_regexp_width(self.to_regexp())
1164 return self._width
1165
1166 @property
1167 def min_width(self) -> int:
1168 return self._get_width()[0]
1169
1170 @property
1171 def max_width(self) -> int:
1172 return self._get_width()[1]
1173
1174
1175class TerminalDef(Serialize):
1176 __serialize_fields__ = 'name', 'pattern', 'priority'
1177 __serialize_namespace__ = PatternStr, PatternRE
1178
1179 name: str
1180 pattern: Pattern
1181 priority: int
1182
1183 def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None:
1184 assert isinstance(pattern, Pattern), pattern
1185 self.name = name
1186 self.pattern = pattern
1187 self.priority = priority
1188
1189 def __repr__(self):
1190 return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)
1191
1192 def user_repr(self) -> str:
1193 if self.name.startswith('__'): ##
1194
1195 return self.pattern.raw or self.name
1196 else:
1197 return self.name
1198
1199_T = TypeVar('_T', bound="Token")
1200
1201class Token(str):
1202 #--
1203 __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
1204
1205 type: str
1206 start_pos: int
1207 value: Any
1208 line: int
1209 column: int
1210 end_line: int
1211 end_column: int
1212 end_pos: int
1213
1214 def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
1215 inst = super(Token, cls).__new__(cls, value)
1216 inst.type = type_
1217 inst.start_pos = start_pos
1218 inst.value = value
1219 inst.line = line
1220 inst.column = column
1221 inst.end_line = end_line
1222 inst.end_column = end_column
1223 inst.end_pos = end_pos
1224 return inst
1225
1226 def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
1227 return Token.new_borrow_pos(
1228 type_ if type_ is not None else self.type,
1229 value if value is not None else self.value,
1230 self
1231 )
1232
1233 @classmethod
1234 def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
1235 return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
1236
1237 def __reduce__(self):
1238 return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column))
1239
1240 def __repr__(self):
1241 return 'Token(%r, %r)' % (self.type, self.value)
1242
1243 def __deepcopy__(self, memo):
1244 return Token(self.type, self.value, self.start_pos, self.line, self.column)
1245
1246 def __eq__(self, other):
1247 if isinstance(other, Token) and self.type != other.type:
1248 return False
1249
1250 return str.__eq__(self, other)
1251
1252 __hash__ = str.__hash__
1253
1254
1255class LineCounter:
1256 __slots__ = 'char_pos', 'line', 'column', 'line_start_pos', 'newline_char'
1257
1258 def __init__(self, newline_char):
1259 self.newline_char = newline_char
1260 self.char_pos = 0
1261 self.line = 1
1262 self.column = 1
1263 self.line_start_pos = 0
1264
1265 def __eq__(self, other):
1266 if not isinstance(other, LineCounter):
1267 return NotImplemented
1268
1269 return self.char_pos == other.char_pos and self.newline_char == other.newline_char
1270
1271 def feed(self, token: Token, test_newline=True):
1272 #--
1273 if test_newline:
1274 newlines = token.count(self.newline_char)
1275 if newlines:
1276 self.line += newlines
1277 self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1
1278
1279 self.char_pos += len(token)
1280 self.column = self.char_pos - self.line_start_pos + 1
1281
1282
1283class UnlessCallback:
1284 def __init__(self, scanner):
1285 self.scanner = scanner
1286
1287 def __call__(self, t):
1288 res = self.scanner.match(t.value, 0)
1289 if res:
1290 _value, t.type = res
1291 return t
1292
1293
1294class CallChain:
1295 def __init__(self, callback1, callback2, cond):
1296 self.callback1 = callback1
1297 self.callback2 = callback2
1298 self.cond = cond
1299
1300 def __call__(self, t):
1301 t2 = self.callback1(t)
1302 return self.callback2(t) if self.cond(t2) else t2
1303
1304
1305def _get_match(re_, regexp, s, flags):
1306 m = re_.match(regexp, s, flags)
1307 if m:
1308 return m.group(0)
1309
1310def _create_unless(terminals, g_regex_flags, re_, use_bytes):
1311 tokens_by_type = classify(terminals, lambda t: type(t.pattern))
1312 assert len(tokens_by_type) <= 2, tokens_by_type.keys()
1313 embedded_strs = set()
1314 callback = {}
1315 for retok in tokens_by_type.get(PatternRE, []):
1316 unless = []
1317 for strtok in tokens_by_type.get(PatternStr, []):
1318 if strtok.priority != retok.priority:
1319 continue
1320 s = strtok.pattern.value
1321 if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
1322 unless.append(strtok)
1323 if strtok.pattern.flags <= retok.pattern.flags:
1324 embedded_strs.add(strtok)
1325 if unless:
1326 callback[retok.name] = UnlessCallback(Scanner(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
1327
1328 new_terminals = [t for t in terminals if t not in embedded_strs]
1329 return new_terminals, callback
1330
1331
1332class Scanner:
1333 def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
1334 self.terminals = terminals
1335 self.g_regex_flags = g_regex_flags
1336 self.re_ = re_
1337 self.use_bytes = use_bytes
1338 self.match_whole = match_whole
1339
1340 self.allowed_types = {t.name for t in self.terminals}
1341
1342 self._mres = self._build_mres(terminals, len(terminals))
1343
1344 def _build_mres(self, terminals, max_size):
1345 ##
1346
1347 ##
1348
1349 ##
1350
1351 postfix = '$' if self.match_whole else ''
1352 mres = []
1353 while terminals:
1354 pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
1355 if self.use_bytes:
1356 pattern = pattern.encode('latin-1')
1357 try:
1358 mre = self.re_.compile(pattern, self.g_regex_flags)
1359 except AssertionError: ##
1360
1361 return self._build_mres(terminals, max_size//2)
1362
1363 mres.append((mre, {i: n for n, i in mre.groupindex.items()}))
1364 terminals = terminals[max_size:]
1365 return mres
1366
1367 def match(self, text, pos):
1368 for mre, type_from_index in self._mres:
1369 m = mre.match(text, pos)
1370 if m:
1371 return m.group(0), type_from_index[m.lastindex]
1372
1373
1374def _regexp_has_newline(r: str):
1375 #--
1376 return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
1377
1378
1379class LexerState:
1380 #--
1381
1382 __slots__ = 'text', 'line_ctr', 'last_token'
1383
1384 def __init__(self, text, line_ctr=None, last_token=None):
1385 self.text = text
1386 self.line_ctr = line_ctr or LineCounter(b'\n' if isinstance(text, bytes) else '\n')
1387 self.last_token = last_token
1388
1389 def __eq__(self, other):
1390 if not isinstance(other, LexerState):
1391 return NotImplemented
1392
1393 return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token
1394
1395 def __copy__(self):
1396 return type(self)(self.text, copy(self.line_ctr), self.last_token)
1397
1398
1399class LexerThread:
1400 #--
1401
1402 def __init__(self, lexer: 'Lexer', lexer_state: LexerState):
1403 self.lexer = lexer
1404 self.state = lexer_state
1405
1406 @classmethod
1407 def from_text(cls, lexer: 'Lexer', text: str):
1408 return cls(lexer, LexerState(text))
1409
1410 def lex(self, parser_state):
1411 return self.lexer.lex(self.state, parser_state)
1412
1413 def __copy__(self):
1414 return type(self)(self.lexer, copy(self.state))
1415
1416 _Token = Token
1417
1418
1419_Callback = Callable[[Token], Token]
1420
1421class Lexer(ABC):
1422 #--
1423 @abstractmethod
1424 def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
1425 return NotImplemented
1426
1427 def make_lexer_state(self, text):
1428 #--
1429 return LexerState(text)
1430
1431
1432class BasicLexer(Lexer):
1433
1434 terminals: Collection[TerminalDef]
1435 ignore_types: FrozenSet[str]
1436 newline_types: FrozenSet[str]
1437 user_callbacks: Dict[str, _Callback]
1438 callback: Dict[str, _Callback]
1439 re: ModuleType
1440
1441 def __init__(self, conf: 'LexerConf') -> None:
1442 terminals = list(conf.terminals)
1443 assert all(isinstance(t, TerminalDef) for t in terminals), terminals
1444
1445 self.re = conf.re_module
1446
1447 if not conf.skip_validation:
1448 ##
1449
1450 for t in terminals:
1451 try:
1452 self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags)
1453 except self.re.error:
1454 raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
1455
1456 if t.pattern.min_width == 0:
1457 raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))
1458
1459 if not (set(conf.ignore) <= {t.name for t in terminals}):
1460 raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))
1461
1462 ##
1463
1464 self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
1465 self.ignore_types = frozenset(conf.ignore)
1466
1467 terminals.sort(key=lambda x: (-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
1468 self.terminals = terminals
1469 self.user_callbacks = conf.callbacks
1470 self.g_regex_flags = conf.g_regex_flags
1471 self.use_bytes = conf.use_bytes
1472 self.terminals_by_name = conf.terminals_by_name
1473
1474 self._scanner = None
1475
1476 def _build_scanner(self):
1477 terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
1478 assert all(self.callback.values())
1479
1480 for type_, f in self.user_callbacks.items():
1481 if type_ in self.callback:
1482 ##
1483
1484 self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
1485 else:
1486 self.callback[type_] = f
1487
1488 self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
1489
1490 @property
1491 def scanner(self):
1492 if self._scanner is None:
1493 self._build_scanner()
1494 return self._scanner
1495
1496 def match(self, text, pos):
1497 return self.scanner.match(text, pos)
1498
1499 def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
1500 with suppress(EOFError):
1501 while True:
1502 yield self.next_token(state, parser_state)
1503
1504 def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token:
1505 line_ctr = lex_state.line_ctr
1506 while line_ctr.char_pos < len(lex_state.text):
1507 res = self.match(lex_state.text, line_ctr.char_pos)
1508 if not res:
1509 allowed = self.scanner.allowed_types - self.ignore_types
1510 if not allowed:
1511 allowed = {"<END-OF-FILE>"}
1512 raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
1513 allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
1514 state=parser_state, terminals_by_name=self.terminals_by_name)
1515
1516 value, type_ = res
1517
1518 if type_ not in self.ignore_types:
1519 t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
1520 line_ctr.feed(value, type_ in self.newline_types)
1521 t.end_line = line_ctr.line
1522 t.end_column = line_ctr.column
1523 t.end_pos = line_ctr.char_pos
1524 if t.type in self.callback:
1525 t = self.callback[t.type](t)
1526 if not isinstance(t, Token):
1527 raise LexError("Callbacks must return a token (returned %r)" % t)
1528 lex_state.last_token = t
1529 return t
1530 else:
1531 if type_ in self.callback:
1532 t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
1533 self.callback[type_](t2)
1534 line_ctr.feed(value, type_ in self.newline_types)
1535
1536 ##
1537
1538 raise EOFError(self)
1539
1540
1541class ContextualLexer(Lexer):
1542
1543 lexers: Dict[str, BasicLexer]
1544 root_lexer: BasicLexer
1545
1546 def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None:
1547 terminals = list(conf.terminals)
1548 terminals_by_name = conf.terminals_by_name
1549
1550 trad_conf = copy(conf)
1551 trad_conf.terminals = terminals
1552
1553 lexer_by_tokens: Dict[FrozenSet[str], BasicLexer] = {}
1554 self.lexers = {}
1555 for state, accepts in states.items():
1556 key = frozenset(accepts)
1557 try:
1558 lexer = lexer_by_tokens[key]
1559 except KeyError:
1560 accepts = set(accepts) | set(conf.ignore) | set(always_accept)
1561 lexer_conf = copy(trad_conf)
1562 lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name]
1563 lexer = BasicLexer(lexer_conf)
1564 lexer_by_tokens[key] = lexer
1565
1566 self.lexers[state] = lexer
1567
1568 assert trad_conf.terminals is terminals
1569 self.root_lexer = BasicLexer(trad_conf)
1570
1571 def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
1572 try:
1573 while True:
1574 lexer = self.lexers[parser_state.position]
1575 yield lexer.next_token(lexer_state, parser_state)
1576 except EOFError:
1577 pass
1578 except UnexpectedCharacters as e:
1579 ##
1580
1581 ##
1582
1583 try:
1584 last_token = lexer_state.last_token ##
1585
1586 token = self.root_lexer.next_token(lexer_state, parser_state)
1587 raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name)
1588 except UnexpectedCharacters:
1589 raise e ##
1590
1591
1592
1593
1594_ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]'
1595_LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
1596_Callback = Callable[[Token], Token]
1597
1598class LexerConf(Serialize):
1599 __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
1600 __serialize_namespace__ = TerminalDef,
1601
1602 terminals: Collection[TerminalDef]
1603 re_module: ModuleType
1604 ignore: Collection[str]
1605 postlex: 'Optional[PostLex]'
1606 callbacks: Dict[str, _Callback]
1607 g_regex_flags: int
1608 skip_validation: bool
1609 use_bytes: bool
1610 lexer_type: Optional[_LexerArgType]
1611
1612 def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
1613 self.terminals = terminals
1614 self.terminals_by_name = {t.name: t for t in self.terminals}
1615 assert len(self.terminals) == len(self.terminals_by_name)
1616 self.ignore = ignore
1617 self.postlex = postlex
1618 self.callbacks = callbacks or {}
1619 self.g_regex_flags = g_regex_flags
1620 self.re_module = re_module
1621 self.skip_validation = skip_validation
1622 self.use_bytes = use_bytes
1623 self.lexer_type = None
1624
1625 def _deserialize(self):
1626 self.terminals_by_name = {t.name: t for t in self.terminals}
1627
1628 def __deepcopy__(self, memo=None):
1629 return type(self)(
1630 deepcopy(self.terminals, memo),
1631 self.re_module,
1632 deepcopy(self.ignore, memo),
1633 deepcopy(self.postlex, memo),
1634 deepcopy(self.callbacks, memo),
1635 deepcopy(self.g_regex_flags, memo),
1636 deepcopy(self.skip_validation, memo),
1637 deepcopy(self.use_bytes, memo),
1638 )
1639
1640
1641class ParserConf(Serialize):
1642 __serialize_fields__ = 'rules', 'start', 'parser_type'
1643
1644 def __init__(self, rules, callbacks, start):
1645 assert isinstance(start, list)
1646 self.rules = rules
1647 self.callbacks = callbacks
1648 self.start = start
1649
1650 self.parser_type = None
1651
1652
1653from functools import partial, wraps
1654from itertools import product, repeat
1655
1656
1657class ExpandSingleChild:
1658 def __init__(self, node_builder):
1659 self.node_builder = node_builder
1660
1661 def __call__(self, children):
1662 if len(children) == 1:
1663 return children[0]
1664 else:
1665 return self.node_builder(children)
1666
1667
1668
1669class PropagatePositions:
1670 def __init__(self, node_builder, node_filter=None):
1671 self.node_builder = node_builder
1672 self.node_filter = node_filter
1673
1674 def __call__(self, children):
1675 res = self.node_builder(children)
1676
1677 if isinstance(res, Tree):
1678 ##
1679
1680 ##
1681
1682 ##
1683
1684 ##
1685
1686
1687 res_meta = res.meta
1688
1689 first_meta = self._pp_get_meta(children)
1690 if first_meta is not None:
1691 if not hasattr(res_meta, 'line'):
1692 ##
1693
1694 res_meta.line = getattr(first_meta, 'container_line', first_meta.line)
1695 res_meta.column = getattr(first_meta, 'container_column', first_meta.column)
1696 res_meta.start_pos = getattr(first_meta, 'container_start_pos', first_meta.start_pos)
1697 res_meta.empty = False
1698
1699 res_meta.container_line = getattr(first_meta, 'container_line', first_meta.line)
1700 res_meta.container_column = getattr(first_meta, 'container_column', first_meta.column)
1701
1702 last_meta = self._pp_get_meta(reversed(children))
1703 if last_meta is not None:
1704 if not hasattr(res_meta, 'end_line'):
1705 res_meta.end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
1706 res_meta.end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
1707 res_meta.end_pos = getattr(last_meta, 'container_end_pos', last_meta.end_pos)
1708 res_meta.empty = False
1709
1710 res_meta.container_end_line = getattr(last_meta, 'container_end_line', last_meta.end_line)
1711 res_meta.container_end_column = getattr(last_meta, 'container_end_column', last_meta.end_column)
1712
1713 return res
1714
1715 def _pp_get_meta(self, children):
1716 for c in children:
1717 if self.node_filter is not None and not self.node_filter(c):
1718 continue
1719 if isinstance(c, Tree):
1720 if not c.meta.empty:
1721 return c.meta
1722 elif isinstance(c, Token):
1723 return c
1724
1725def make_propagate_positions(option):
1726 if callable(option):
1727 return partial(PropagatePositions, node_filter=option)
1728 elif option is True:
1729 return PropagatePositions
1730 elif option is False:
1731 return None
1732
1733 raise ConfigurationError('Invalid option for propagate_positions: %r' % option)
1734
1735
1736class ChildFilter:
1737 def __init__(self, to_include, append_none, node_builder):
1738 self.node_builder = node_builder
1739 self.to_include = to_include
1740 self.append_none = append_none
1741
1742 def __call__(self, children):
1743 filtered = []
1744
1745 for i, to_expand, add_none in self.to_include:
1746 if add_none:
1747 filtered += [None] * add_none
1748 if to_expand:
1749 filtered += children[i].children
1750 else:
1751 filtered.append(children[i])
1752
1753 if self.append_none:
1754 filtered += [None] * self.append_none
1755
1756 return self.node_builder(filtered)
1757
1758
1759class ChildFilterLALR(ChildFilter):
1760 #--
1761
1762 def __call__(self, children):
1763 filtered = []
1764 for i, to_expand, add_none in self.to_include:
1765 if add_none:
1766 filtered += [None] * add_none
1767 if to_expand:
1768 if filtered:
1769 filtered += children[i].children
1770 else: ##
1771
1772 filtered = children[i].children
1773 else:
1774 filtered.append(children[i])
1775
1776 if self.append_none:
1777 filtered += [None] * self.append_none
1778
1779 return self.node_builder(filtered)
1780
1781
1782class ChildFilterLALR_NoPlaceholders(ChildFilter):
1783 #--
1784 def __init__(self, to_include, node_builder):
1785 self.node_builder = node_builder
1786 self.to_include = to_include
1787
1788 def __call__(self, children):
1789 filtered = []
1790 for i, to_expand in self.to_include:
1791 if to_expand:
1792 if filtered:
1793 filtered += children[i].children
1794 else: ##
1795
1796 filtered = children[i].children
1797 else:
1798 filtered.append(children[i])
1799 return self.node_builder(filtered)
1800
1801
1802def _should_expand(sym):
1803 return not sym.is_term and sym.name.startswith('_')
1804
1805
1806def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
1807 ##
1808
1809 if _empty_indices:
1810 assert _empty_indices.count(False) == len(expansion)
1811 s = ''.join(str(int(b)) for b in _empty_indices)
1812 empty_indices = [len(ones) for ones in s.split('0')]
1813 assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
1814 else:
1815 empty_indices = [0] * (len(expansion)+1)
1816
1817 to_include = []
1818 nones_to_add = 0
1819 for i, sym in enumerate(expansion):
1820 nones_to_add += empty_indices[i]
1821 if keep_all_tokens or not (sym.is_term and sym.filter_out):
1822 to_include.append((i, _should_expand(sym), nones_to_add))
1823 nones_to_add = 0
1824
1825 nones_to_add += empty_indices[len(expansion)]
1826
1827 if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
1828 if _empty_indices or ambiguous:
1829 return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
1830 else:
1831 ##
1832
1833 return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])
1834
1835
1836class AmbiguousExpander:
1837 #--
1838 def __init__(self, to_expand, tree_class, node_builder):
1839 self.node_builder = node_builder
1840 self.tree_class = tree_class
1841 self.to_expand = to_expand
1842
1843 def __call__(self, children):
1844 def _is_ambig_tree(t):
1845 return hasattr(t, 'data') and t.data == '_ambig'
1846
1847 ##
1848
1849 ##
1850
1851 ##
1852
1853 ##
1854
1855 ambiguous = []
1856 for i, child in enumerate(children):
1857 if _is_ambig_tree(child):
1858 if i in self.to_expand:
1859 ambiguous.append(i)
1860
1861 child.expand_kids_by_data('_ambig')
1862
1863 if not ambiguous:
1864 return self.node_builder(children)
1865
1866 expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
1867 return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
1868
1869
1870def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
1871 to_expand = [i for i, sym in enumerate(expansion)
1872 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
1873 if to_expand:
1874 return partial(AmbiguousExpander, to_expand, tree_class)
1875
1876
1877class AmbiguousIntermediateExpander:
1878 #--
1879
1880 def __init__(self, tree_class, node_builder):
1881 self.node_builder = node_builder
1882 self.tree_class = tree_class
1883
1884 def __call__(self, children):
1885 def _is_iambig_tree(child):
1886 return hasattr(child, 'data') and child.data == '_iambig'
1887
1888 def _collapse_iambig(children):
1889 #--
1890
1891 ##
1892
1893 ##
1894
1895 if children and _is_iambig_tree(children[0]):
1896 iambig_node = children[0]
1897 result = []
1898 for grandchild in iambig_node.children:
1899 collapsed = _collapse_iambig(grandchild.children)
1900 if collapsed:
1901 for child in collapsed:
1902 child.children += children[1:]
1903 result += collapsed
1904 else:
1905 new_tree = self.tree_class('_inter', grandchild.children + children[1:])
1906 result.append(new_tree)
1907 return result
1908
1909 collapsed = _collapse_iambig(children)
1910 if collapsed:
1911 processed_nodes = [self.node_builder(c.children) for c in collapsed]
1912 return self.tree_class('_ambig', processed_nodes)
1913
1914 return self.node_builder(children)
1915
1916
1917
1918def inplace_transformer(func):
1919 @wraps(func)
1920 def f(children):
1921 ##
1922
1923 tree = Tree(func.__name__, children)
1924 return func(tree)
1925 return f
1926
1927
1928def apply_visit_wrapper(func, name, wrapper):
1929 if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
1930 raise NotImplementedError("Meta args not supported for internal transformer")
1931
1932 @wraps(func)
1933 def f(children):
1934 return wrapper(func, name, children, None)
1935 return f
1936
1937
1938class ParseTreeBuilder:
1939 def __init__(self, rules, tree_class, propagate_positions=False, ambiguous=False, maybe_placeholders=False):
1940 self.tree_class = tree_class
1941 self.propagate_positions = propagate_positions
1942 self.ambiguous = ambiguous
1943 self.maybe_placeholders = maybe_placeholders
1944
1945 self.rule_builders = list(self._init_builders(rules))
1946
1947 def _init_builders(self, rules):
1948 propagate_positions = make_propagate_positions(self.propagate_positions)
1949
1950 for rule in rules:
1951 options = rule.options
1952 keep_all_tokens = options.keep_all_tokens
1953 expand_single_child = options.expand1
1954
1955 wrapper_chain = list(filter(None, [
1956 (expand_single_child and not rule.alias) and ExpandSingleChild,
1957 maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
1958 propagate_positions,
1959 self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
1960 self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
1961 ]))
1962
1963 yield rule, wrapper_chain
1964
1965 def create_callback(self, transformer=None):
1966 callbacks = {}
1967
1968 default_handler = getattr(transformer, '__default__', None)
1969 if default_handler:
1970 def default_callback(data, children):
1971 return default_handler(data, children, None)
1972 else:
1973 default_callback = self.tree_class
1974
1975 for rule, wrapper_chain in self.rule_builders:
1976
1977 user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
1978 try:
1979 f = getattr(transformer, user_callback_name)
1980 wrapper = getattr(f, 'visit_wrapper', None)
1981 if wrapper is not None:
1982 f = apply_visit_wrapper(f, user_callback_name, wrapper)
1983 elif isinstance(transformer, Transformer_InPlace):
1984 f = inplace_transformer(f)
1985 except AttributeError:
1986 f = partial(default_callback, user_callback_name)
1987
1988 for w in wrapper_chain:
1989 f = w(f)
1990
1991 if rule in callbacks:
1992 raise GrammarError("Rule '%s' already exists" % (rule,))
1993
1994 callbacks[rule] = f
1995
1996 return callbacks
1997
1998
1999
2000class LALR_Parser(Serialize):
2001 def __init__(self, parser_conf, debug=False):
2002 analysis = LALR_Analyzer(parser_conf, debug=debug)
2003 analysis.compute_lalr()
2004 callbacks = parser_conf.callbacks
2005
2006 self._parse_table = analysis.parse_table
2007 self.parser_conf = parser_conf
2008 self.parser = _Parser(analysis.parse_table, callbacks, debug)
2009
2010 @classmethod
2011 def deserialize(cls, data, memo, callbacks, debug=False):
2012 inst = cls.__new__(cls)
2013 inst._parse_table = IntParseTable.deserialize(data, memo)
2014 inst.parser = _Parser(inst._parse_table, callbacks, debug)
2015 return inst
2016
2017 def serialize(self, memo):
2018 return self._parse_table.serialize(memo)
2019
2020 def parse_interactive(self, lexer, start):
2021 return self.parser.parse(lexer, start, start_interactive=True)
2022
2023 def parse(self, lexer, start, on_error=None):
2024 try:
2025 return self.parser.parse(lexer, start)
2026 except UnexpectedInput as e:
2027 if on_error is None:
2028 raise
2029
2030 while True:
2031 if isinstance(e, UnexpectedCharacters):
2032 s = e.interactive_parser.lexer_thread.state
2033 p = s.line_ctr.char_pos
2034
2035 if not on_error(e):
2036 raise e
2037
2038 if isinstance(e, UnexpectedCharacters):
2039 ##
2040
2041 if p == s.line_ctr.char_pos:
2042 s.line_ctr.feed(s.text[p:p+1])
2043
2044 try:
2045 return e.interactive_parser.resume_parse()
2046 except UnexpectedToken as e2:
2047 if (isinstance(e, UnexpectedToken)
2048 and e.token.type == e2.token.type == '$END'
2049 and e.interactive_parser == e2.interactive_parser):
2050 ##
2051
2052 raise e2
2053 e = e2
2054 except UnexpectedCharacters as e2:
2055 e = e2
2056
2057
2058class ParseConf:
2059 __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states'
2060
2061 def __init__(self, parse_table, callbacks, start):
2062 self.parse_table = parse_table
2063
2064 self.start_state = self.parse_table.start_states[start]
2065 self.end_state = self.parse_table.end_states[start]
2066 self.states = self.parse_table.states
2067
2068 self.callbacks = callbacks
2069 self.start = start
2070
2071
2072class ParserState:
2073 __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack'
2074
2075 def __init__(self, parse_conf, lexer, state_stack=None, value_stack=None):
2076 self.parse_conf = parse_conf
2077 self.lexer = lexer
2078 self.state_stack = state_stack or [self.parse_conf.start_state]
2079 self.value_stack = value_stack or []
2080
2081 @property
2082 def position(self):
2083 return self.state_stack[-1]
2084
2085 ##
2086
2087 def __eq__(self, other):
2088 if not isinstance(other, ParserState):
2089 return NotImplemented
2090 return len(self.state_stack) == len(other.state_stack) and self.position == other.position
2091
2092 def __copy__(self):
2093 return type(self)(
2094 self.parse_conf,
2095 self.lexer, ##
2096
2097 copy(self.state_stack),
2098 deepcopy(self.value_stack),
2099 )
2100
2101 def copy(self):
2102 return copy(self)
2103
2104 def feed_token(self, token, is_end=False):
2105 state_stack = self.state_stack
2106 value_stack = self.value_stack
2107 states = self.parse_conf.states
2108 end_state = self.parse_conf.end_state
2109 callbacks = self.parse_conf.callbacks
2110
2111 while True:
2112 state = state_stack[-1]
2113 try:
2114 action, arg = states[state][token.type]
2115 except KeyError:
2116 expected = {s for s in states[state].keys() if s.isupper()}
2117 raise UnexpectedToken(token, expected, state=self, interactive_parser=None)
2118
2119 assert arg != end_state
2120
2121 if action is Shift:
2122 ##
2123
2124 assert not is_end
2125 state_stack.append(arg)
2126 value_stack.append(token if token.type not in callbacks else callbacks[token.type](token))
2127 return
2128 else:
2129 ##
2130
2131 rule = arg
2132 size = len(rule.expansion)
2133 if size:
2134 s = value_stack[-size:]
2135 del state_stack[-size:]
2136 del value_stack[-size:]
2137 else:
2138 s = []
2139
2140 value = callbacks[rule](s)
2141
2142 _action, new_state = states[state_stack[-1]][rule.origin.name]
2143 assert _action is Shift
2144 state_stack.append(new_state)
2145 value_stack.append(value)
2146
2147 if is_end and state_stack[-1] == end_state:
2148 return value_stack[-1]
2149
2150class _Parser:
2151 def __init__(self, parse_table, callbacks, debug=False):
2152 self.parse_table = parse_table
2153 self.callbacks = callbacks
2154 self.debug = debug
2155
2156 def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
2157 parse_conf = ParseConf(self.parse_table, self.callbacks, start)
2158 parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
2159 if start_interactive:
2160 return InteractiveParser(self, parser_state, parser_state.lexer)
2161 return self.parse_from_state(parser_state)
2162
2163
2164 def parse_from_state(self, state):
2165 ##
2166
2167 try:
2168 token = None
2169 for token in state.lexer.lex(state):
2170 state.feed_token(token)
2171
2172 end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
2173 return state.feed_token(end_token, True)
2174 except UnexpectedInput as e:
2175 try:
2176 e.interactive_parser = InteractiveParser(self, state, state.lexer)
2177 except NameError:
2178 pass
2179 raise e
2180 except Exception as e:
2181 if self.debug:
2182 print("")
2183 print("STATE STACK DUMP")
2184 print("----------------")
2185 for i, s in enumerate(state.state_stack):
2186 print('%d)' % i , s)
2187 print("")
2188
2189 raise
2190
2191
2192class Action:
2193 def __init__(self, name):
2194 self.name = name
2195 def __str__(self):
2196 return self.name
2197 def __repr__(self):
2198 return str(self)
2199
2200Shift = Action('Shift')
2201Reduce = Action('Reduce')
2202
2203
2204class ParseTable:
2205 def __init__(self, states, start_states, end_states):
2206 self.states = states
2207 self.start_states = start_states
2208 self.end_states = end_states
2209
2210 def serialize(self, memo):
2211 tokens = Enumerator()
2212 rules = Enumerator()
2213
2214 states = {
2215 state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg))
2216 for token, (action, arg) in actions.items()}
2217 for state, actions in self.states.items()
2218 }
2219
2220 return {
2221 'tokens': tokens.reversed(),
2222 'states': states,
2223 'start_states': self.start_states,
2224 'end_states': self.end_states,
2225 }
2226
2227 @classmethod
2228 def deserialize(cls, data, memo):
2229 tokens = data['tokens']
2230 states = {
2231 state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg))
2232 for token, (action, arg) in actions.items()}
2233 for state, actions in data['states'].items()
2234 }
2235 return cls(states, data['start_states'], data['end_states'])
2236
2237
2238class IntParseTable(ParseTable):
2239
2240 @classmethod
2241 def from_ParseTable(cls, parse_table):
2242 enum = list(parse_table.states)
2243 state_to_idx = {s:i for i,s in enumerate(enum)}
2244 int_states = {}
2245
2246 for s, la in parse_table.states.items():
2247 la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
2248 for k,v in la.items()}
2249 int_states[ state_to_idx[s] ] = la
2250
2251
2252 start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
2253 end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
2254 return cls(int_states, start_states, end_states)
2255
2256
2257
2258def _wrap_lexer(lexer_class):
2259 future_interface = getattr(lexer_class, '__future_interface__', False)
2260 if future_interface:
2261 return lexer_class
2262 else:
2263 class CustomLexerWrapper(Lexer):
2264 def __init__(self, lexer_conf):
2265 self.lexer = lexer_class(lexer_conf)
2266 def lex(self, lexer_state, parser_state):
2267 return self.lexer.lex(lexer_state.text)
2268 return CustomLexerWrapper
2269
2270
2271def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
2272 parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
2273 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
2274 parser = cls.deserialize(data['parser'], memo, callbacks, options.debug)
2275 parser_conf.callbacks = callbacks
2276 return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
2277
2278
2279_parser_creators: 'Dict[str, Callable[[LexerConf, Any, Any], Any]]' = {}
2280
2281
2282class ParsingFrontend(Serialize):
2283 __serialize_fields__ = 'lexer_conf', 'parser_conf', 'parser'
2284
2285 def __init__(self, lexer_conf, parser_conf, options, parser=None):
2286 self.parser_conf = parser_conf
2287 self.lexer_conf = lexer_conf
2288 self.options = options
2289
2290 ##
2291
2292 if parser: ##
2293
2294 self.parser = parser
2295 else:
2296 create_parser = _parser_creators.get(parser_conf.parser_type)
2297 assert create_parser is not None, "{} is not supported in standalone mode".format(
2298 parser_conf.parser_type
2299 )
2300 self.parser = create_parser(lexer_conf, parser_conf, options)
2301
2302 ##
2303
2304 lexer_type = lexer_conf.lexer_type
2305 self.skip_lexer = False
2306 if lexer_type in ('dynamic', 'dynamic_complete'):
2307 assert lexer_conf.postlex is None
2308 self.skip_lexer = True
2309 return
2310
2311 try:
2312 create_lexer = {
2313 'basic': create_basic_lexer,
2314 'contextual': create_contextual_lexer,
2315 }[lexer_type]
2316 except KeyError:
2317 assert issubclass(lexer_type, Lexer), lexer_type
2318 self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
2319 else:
2320 self.lexer = create_lexer(lexer_conf, self.parser, lexer_conf.postlex, options)
2321
2322 if lexer_conf.postlex:
2323 self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
2324
2325 def _verify_start(self, start=None):
2326 if start is None:
2327 start_decls = self.parser_conf.start
2328 if len(start_decls) > 1:
2329 raise ConfigurationError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start_decls)
2330 start ,= start_decls
2331 elif start not in self.parser_conf.start:
2332 raise ConfigurationError("Unknown start rule %s. Must be one of %r" % (start, self.parser_conf.start))
2333 return start
2334
2335 def _make_lexer_thread(self, text):
2336 cls = (self.options and self.options._plugins.get('LexerThread')) or LexerThread
2337 return text if self.skip_lexer else cls.from_text(self.lexer, text)
2338
2339 def parse(self, text, start=None, on_error=None):
2340 chosen_start = self._verify_start(start)
2341 kw = {} if on_error is None else {'on_error': on_error}
2342 stream = self._make_lexer_thread(text)
2343 return self.parser.parse(stream, chosen_start, **kw)
2344
2345 def parse_interactive(self, text=None, start=None):
2346 chosen_start = self._verify_start(start)
2347 if self.parser_conf.parser_type != 'lalr':
2348 raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
2349 stream = self._make_lexer_thread(text)
2350 return self.parser.parse_interactive(stream, chosen_start)
2351
2352
2353def _validate_frontend_args(parser, lexer) -> None:
2354 assert_config(parser, ('lalr', 'earley', 'cyk'))
2355 if not isinstance(lexer, type): ##
2356
2357 expected = {
2358 'lalr': ('basic', 'contextual'),
2359 'earley': ('basic', 'dynamic', 'dynamic_complete'),
2360 'cyk': ('basic', ),
2361 }[parser]
2362 assert_config(lexer, expected, 'Parser %r does not support lexer %%r, expected one of %%s' % parser)
2363
2364
2365def _get_lexer_callbacks(transformer, terminals):
2366 result = {}
2367 for terminal in terminals:
2368 callback = getattr(transformer, terminal.name, None)
2369 if callback is not None:
2370 result[terminal.name] = callback
2371 return result
2372
2373class PostLexConnector:
2374 def __init__(self, lexer, postlexer):
2375 self.lexer = lexer
2376 self.postlexer = postlexer
2377
2378 def lex(self, lexer_state, parser_state):
2379 i = self.lexer.lex(lexer_state, parser_state)
2380 return self.postlexer.process(i)
2381
2382
2383
2384def create_basic_lexer(lexer_conf, parser, postlex, options):
2385 cls = (options and options._plugins.get('BasicLexer')) or BasicLexer
2386 return cls(lexer_conf)
2387
2388def create_contextual_lexer(lexer_conf, parser, postlex, options):
2389 cls = (options and options._plugins.get('ContextualLexer')) or ContextualLexer
2390 states = {idx:list(t.keys()) for idx, t in parser._parse_table.states.items()}
2391 always_accept = postlex.always_accept if postlex else ()
2392 return cls(lexer_conf, states, always_accept=always_accept)
2393
2394def create_lalr_parser(lexer_conf, parser_conf, options=None):
2395 debug = options.debug if options else False
2396 cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
2397 return cls(parser_conf, debug=debug)
2398
2399_parser_creators['lalr'] = create_lalr_parser
2400
2401
2402
2403
2404class PostLex(ABC):
2405 @abstractmethod
2406 def process(self, stream: Iterator[Token]) -> Iterator[Token]:
2407 return stream
2408
2409 always_accept: Iterable[str] = ()
2410
2411class LarkOptions(Serialize):
2412 #--
2413
2414 start: List[str]
2415 debug: bool
2416 transformer: 'Optional[Transformer]'
2417 propagate_positions: Union[bool, str]
2418 maybe_placeholders: bool
2419 cache: Union[bool, str]
2420 regex: bool
2421 g_regex_flags: int
2422 keep_all_tokens: bool
2423 tree_class: Any
2424 parser: _ParserArgType
2425 lexer: _LexerArgType
2426 ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
2427 postlex: Optional[PostLex]
2428 priority: 'Optional[Literal["auto", "normal", "invert"]]'
2429 lexer_callbacks: Dict[str, Callable[[Token], Token]]
2430 use_bytes: bool
2431 edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
2432 import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
2433 source_path: Optional[str]
2434
2435 OPTIONS_DOC = """
2436 **=== General Options ===**
2437
2438 start
2439 The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
2440 debug
2441 Display debug information and extra warnings. Use only when debugging (Default: ``False``)
2442 When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
2443 transformer
2444 Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
2445 propagate_positions
2446 Propagates (line, column, end_line, end_column) attributes into all tree branches.
2447 Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
2448 maybe_placeholders
2449 When ``True``, the ``[]`` operator returns ``None`` when not matched.
2450 When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
2451 (default= ``True``)
2452 cache
2453 Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
2454
2455 - When ``False``, does nothing (default)
2456 - When ``True``, caches to a temporary file in the local directory
2457 - When given a string, caches to the path pointed by the string
2458 regex
2459 When True, uses the ``regex`` module instead of the stdlib ``re``.
2460 g_regex_flags
2461 Flags that are applied to all terminals (both regex and strings)
2462 keep_all_tokens
2463 Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
2464 tree_class
2465 Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
2466
2467 **=== Algorithm Options ===**
2468
2469 parser
2470 Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
2471 (there is also a "cyk" option for legacy)
2472 lexer
2473 Decides whether or not to use a lexer stage
2474
2475 - "auto" (default): Choose for me based on the parser
2476 - "basic": Use a basic lexer
2477 - "contextual": Stronger lexer (only works with parser="lalr")
2478 - "dynamic": Flexible and powerful (only with parser="earley")
2479 - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
2480 ambiguity
2481 Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
2482
2483 - "resolve": The parser will automatically choose the simplest derivation
2484 (it chooses consistently: greedy for tokens, non-greedy for rules)
2485 - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
2486 - "forest": The parser will return the root of the shared packed parse forest.
2487
2488 **=== Misc. / Domain Specific Options ===**
2489
2490 postlex
2491 Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
2492 priority
2493 How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
2494 lexer_callbacks
2495 Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
2496 use_bytes
2497 Accept an input of type ``bytes`` instead of ``str``.
2498 edit_terminals
2499 A callback for editing the terminals before parse.
2500 import_paths
2501 A List of either paths or loader functions to specify from where grammars are imported
2502 source_path
2503 Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
2504 **=== End of Options ===**
2505 """
2506 if __doc__:
2507 __doc__ += OPTIONS_DOC
2508
2509
2510 ##
2511
2512 ##
2513
2514 ##
2515
2516 ##
2517
2518 ##
2519
2520 ##
2521
2522 _defaults: Dict[str, Any] = {
2523 'debug': False,
2524 'keep_all_tokens': False,
2525 'tree_class': None,
2526 'cache': False,
2527 'postlex': None,
2528 'parser': 'earley',
2529 'lexer': 'auto',
2530 'transformer': None,
2531 'start': 'start',
2532 'priority': 'auto',
2533 'ambiguity': 'auto',
2534 'regex': False,
2535 'propagate_positions': False,
2536 'lexer_callbacks': {},
2537 'maybe_placeholders': True,
2538 'edit_terminals': None,
2539 'g_regex_flags': 0,
2540 'use_bytes': False,
2541 'import_paths': [],
2542 'source_path': None,
2543 '_plugins': {},
2544 }
2545
2546 def __init__(self, options_dict):
2547 o = dict(options_dict)
2548
2549 options = {}
2550 for name, default in self._defaults.items():
2551 if name in o:
2552 value = o.pop(name)
2553 if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'):
2554 value = bool(value)
2555 else:
2556 value = default
2557
2558 options[name] = value
2559
2560 if isinstance(options['start'], str):
2561 options['start'] = [options['start']]
2562
2563 self.__dict__['options'] = options
2564
2565
2566 assert_config(self.parser, ('earley', 'lalr', 'cyk', None))
2567
2568 if self.parser == 'earley' and self.transformer:
2569 raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
2570 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
2571
2572 if o:
2573 raise ConfigurationError("Unknown options: %s" % o.keys())
2574
2575 def __getattr__(self, name):
2576 try:
2577 return self.__dict__['options'][name]
2578 except KeyError as e:
2579 raise AttributeError(e)
2580
2581 def __setattr__(self, name, value):
2582 assert_config(name, self.options.keys(), "%r isn't a valid option. Expected one of: %s")
2583 self.options[name] = value
2584
2585 def serialize(self, memo):
2586 return self.options
2587
2588 @classmethod
2589 def deserialize(cls, data, memo):
2590 return cls(data)
2591
2592
2593##
2594
2595##
2596
2597_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'lexer_callbacks', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class', '_plugins'}
2598
2599_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
2600_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')
2601
2602
2603_T = TypeVar('_T', bound="Lark")
2604
2605class Lark(Serialize):
2606 #--
2607
2608 source_path: str
2609 source_grammar: str
2610 grammar: 'Grammar'
2611 options: LarkOptions
2612 lexer: Lexer
2613 terminals: List[TerminalDef]
2614
2615 def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
2616 self.options = LarkOptions(options)
2617
2618 ##
2619
2620 use_regex = self.options.regex
2621 if use_regex:
2622 if regex:
2623 re_module = regex
2624 else:
2625 raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.')
2626 else:
2627 re_module = re
2628
2629 ##
2630
2631 if self.options.source_path is None:
2632 try:
2633 self.source_path = grammar.name
2634 except AttributeError:
2635 self.source_path = '<string>'
2636 else:
2637 self.source_path = self.options.source_path
2638
2639 ##
2640
2641 try:
2642 read = grammar.read
2643 except AttributeError:
2644 pass
2645 else:
2646 grammar = read()
2647
2648 cache_fn = None
2649 cache_md5 = None
2650 if isinstance(grammar, str):
2651 self.source_grammar = grammar
2652 if self.options.use_bytes:
2653 if not isascii(grammar):
2654 raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
2655
2656 if self.options.cache:
2657 if self.options.parser != 'lalr':
2658 raise ConfigurationError("cache only works with parser='lalr' for now")
2659
2660 unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals', '_plugins')
2661 options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
2662 from . import __version__
2663 s = grammar + options_str + __version__ + str(sys.version_info[:2])
2664 cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest()
2665
2666 if isinstance(self.options.cache, str):
2667 cache_fn = self.options.cache
2668 else:
2669 if self.options.cache is not True:
2670 raise ConfigurationError("cache argument must be bool or str")
2671
2672 cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2])
2673
2674 if FS.exists(cache_fn):
2675 logger.debug('Loading grammar from cache: %s', cache_fn)
2676 ##
2677
2678 for name in (set(options) - _LOAD_ALLOWED_OPTIONS):
2679 del options[name]
2680 with FS.open(cache_fn, 'rb') as f:
2681 old_options = self.options
2682 try:
2683 file_md5 = f.readline().rstrip(b'\n')
2684 cached_used_files = pickle.load(f)
2685 if file_md5 == cache_md5.encode('utf8') and verify_used_files(cached_used_files):
2686 cached_parser_data = pickle.load(f)
2687 self._load(cached_parser_data, **options)
2688 return
2689 except Exception: ##
2690
2691 logger.exception("Failed to load Lark from cache: %r. We will try to carry on." % cache_fn)
2692
2693 ##
2694
2695 ##
2696
2697 self.options = old_options
2698
2699
2700 ##
2701
2702 self.grammar, used_files = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens)
2703 else:
2704 assert isinstance(grammar, Grammar)
2705 self.grammar = grammar
2706
2707
2708 if self.options.lexer == 'auto':
2709 if self.options.parser == 'lalr':
2710 self.options.lexer = 'contextual'
2711 elif self.options.parser == 'earley':
2712 if self.options.postlex is not None:
2713 logger.info("postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
2714 "Consider using lalr with contextual instead of earley")
2715 self.options.lexer = 'basic'
2716 else:
2717 self.options.lexer = 'dynamic'
2718 elif self.options.parser == 'cyk':
2719 self.options.lexer = 'basic'
2720 else:
2721 assert False, self.options.parser
2722 lexer = self.options.lexer
2723 if isinstance(lexer, type):
2724 assert issubclass(lexer, Lexer) ##
2725
2726 else:
2727 assert_config(lexer, ('basic', 'contextual', 'dynamic', 'dynamic_complete'))
2728 if self.options.postlex is not None and 'dynamic' in lexer:
2729 raise ConfigurationError("Can't use postlex with a dynamic lexer. Use basic or contextual instead")
2730
2731 if self.options.ambiguity == 'auto':
2732 if self.options.parser == 'earley':
2733 self.options.ambiguity = 'resolve'
2734 else:
2735 assert_config(self.options.parser, ('earley', 'cyk'), "%r doesn't support disambiguation. Use one of these parsers instead: %s")
2736
2737 if self.options.priority == 'auto':
2738 self.options.priority = 'normal'
2739
2740 if self.options.priority not in _VALID_PRIORITY_OPTIONS:
2741 raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
2742 if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
2743 raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))
2744
2745 if self.options.parser is None:
2746 terminals_to_keep = '*'
2747 elif self.options.postlex is not None:
2748 terminals_to_keep = set(self.options.postlex.always_accept)
2749 else:
2750 terminals_to_keep = set()
2751
2752 ##
2753
2754 self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start, terminals_to_keep)
2755
2756 if self.options.edit_terminals:
2757 for t in self.terminals:
2758 self.options.edit_terminals(t)
2759
2760 self._terminals_dict = {t.name: t for t in self.terminals}
2761
2762 ##
2763
2764 if self.options.priority == 'invert':
2765 for rule in self.rules:
2766 if rule.options.priority is not None:
2767 rule.options.priority = -rule.options.priority
2768 for term in self.terminals:
2769 term.priority = -term.priority
2770 ##
2771
2772 ##
2773
2774 ##
2775
2776 elif self.options.priority is None:
2777 for rule in self.rules:
2778 if rule.options.priority is not None:
2779 rule.options.priority = None
2780 for term in self.terminals:
2781 term.priority = 0
2782
2783 ##
2784
2785 self.lexer_conf = LexerConf(
2786 self.terminals, re_module, self.ignore_tokens, self.options.postlex,
2787 self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
2788 )
2789
2790 if self.options.parser:
2791 self.parser = self._build_parser()
2792 elif lexer:
2793 self.lexer = self._build_lexer()
2794
2795 if cache_fn:
2796 logger.debug('Saving grammar to cache: %s', cache_fn)
2797 with FS.open(cache_fn, 'wb') as f:
2798 assert cache_md5 is not None
2799 f.write(cache_md5.encode('utf8') + b'\n')
2800 pickle.dump(used_files, f)
2801 self.save(f, _LOAD_ALLOWED_OPTIONS)
2802
2803 if __doc__:
2804 __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
2805
2806 __serialize_fields__ = 'parser', 'rules', 'options'
2807
2808 def _build_lexer(self, dont_ignore=False):
2809 lexer_conf = self.lexer_conf
2810 if dont_ignore:
2811 from copy import copy
2812 lexer_conf = copy(lexer_conf)
2813 lexer_conf.ignore = ()
2814 return BasicLexer(lexer_conf)
2815
2816 def _prepare_callbacks(self):
2817 self._callbacks = {}
2818 ##
2819
2820 if self.options.ambiguity != 'forest':
2821 self._parse_tree_builder = ParseTreeBuilder(
2822 self.rules,
2823 self.options.tree_class or Tree,
2824 self.options.propagate_positions,
2825 self.options.parser != 'lalr' and self.options.ambiguity == 'explicit',
2826 self.options.maybe_placeholders
2827 )
2828 self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
2829 self._callbacks.update(_get_lexer_callbacks(self.options.transformer, self.terminals))
2830
2831 def _build_parser(self):
2832 self._prepare_callbacks()
2833 _validate_frontend_args(self.options.parser, self.options.lexer)
2834 parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
2835 return _construct_parsing_frontend(
2836 self.options.parser,
2837 self.options.lexer,
2838 self.lexer_conf,
2839 parser_conf,
2840 options=self.options
2841 )
2842
2843 def save(self, f, exclude_options: Collection[str] = ()):
2844 #--
2845 data, m = self.memo_serialize([TerminalDef, Rule])
2846 if exclude_options:
2847 data["options"] = {n: v for n, v in data["options"].items() if n not in exclude_options}
2848 pickle.dump({'data': data, 'memo': m}, f, protocol=pickle.HIGHEST_PROTOCOL)
2849
2850 @classmethod
2851 def load(cls, f):
2852 #--
2853 inst = cls.__new__(cls)
2854 return inst._load(f)
2855
2856 def _deserialize_lexer_conf(self, data, memo, options):
2857 lexer_conf = LexerConf.deserialize(data['lexer_conf'], memo)
2858 lexer_conf.callbacks = options.lexer_callbacks or {}
2859 lexer_conf.re_module = regex if options.regex else re
2860 lexer_conf.use_bytes = options.use_bytes
2861 lexer_conf.g_regex_flags = options.g_regex_flags
2862 lexer_conf.skip_validation = True
2863 lexer_conf.postlex = options.postlex
2864 return lexer_conf
2865
2866 def _load(self, f, **kwargs):
2867 if isinstance(f, dict):
2868 d = f
2869 else:
2870 d = pickle.load(f)
2871 memo_json = d['memo']
2872 data = d['data']
2873
2874 assert memo_json
2875 memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
2876 options = dict(data['options'])
2877 if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
2878 raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
2879 .format(set(kwargs) - _LOAD_ALLOWED_OPTIONS))
2880 options.update(kwargs)
2881 self.options = LarkOptions.deserialize(options, memo)
2882 self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
2883 self.source_path = '<deserialized>'
2884 _validate_frontend_args(self.options.parser, self.options.lexer)
2885 self.lexer_conf = self._deserialize_lexer_conf(data['parser'], memo, self.options)
2886 self.terminals = self.lexer_conf.terminals
2887 self._prepare_callbacks()
2888 self._terminals_dict = {t.name: t for t in self.terminals}
2889 self.parser = _deserialize_parsing_frontend(
2890 data['parser'],
2891 memo,
2892 self.lexer_conf,
2893 self._callbacks,
2894 self.options, ##
2895
2896 )
2897 return self
2898
2899 @classmethod
2900 def _load_from_dict(cls, data, memo, **kwargs):
2901 inst = cls.__new__(cls)
2902 return inst._load({'data': data, 'memo': memo}, **kwargs)
2903
2904 @classmethod
2905 def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
2906 #--
2907 if rel_to:
2908 basepath = os.path.dirname(rel_to)
2909 grammar_filename = os.path.join(basepath, grammar_filename)
2910 with open(grammar_filename, encoding='utf8') as f:
2911 return cls(f, **options)
2912
2913 @classmethod
2914 def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: 'Sequence[str]'=[""], **options) -> _T:
2915 #--
2916 package_loader = FromPackageLoader(package, search_paths)
2917 full_path, text = package_loader(None, grammar_path)
2918 options.setdefault('source_path', full_path)
2919 options.setdefault('import_paths', [])
2920 options['import_paths'].append(package_loader)
2921 return cls(text, **options)
2922
2923 def __repr__(self):
2924 return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)
2925
2926
2927 def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
2928 #--
2929 if not hasattr(self, 'lexer') or dont_ignore:
2930 lexer = self._build_lexer(dont_ignore)
2931 else:
2932 lexer = self.lexer
2933 lexer_thread = LexerThread.from_text(lexer, text)
2934 stream = lexer_thread.lex(None)
2935 if self.options.postlex:
2936 return self.options.postlex.process(stream)
2937 return stream
2938
2939 def get_terminal(self, name: str) -> TerminalDef:
2940 #--
2941 return self._terminals_dict[name]
2942
2943 def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
2944 #--
2945 return self.parser.parse_interactive(text, start=start)
2946
2947 def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> 'ParseTree':
2948 #--
2949 return self.parser.parse(text, start=start, on_error=on_error)
2950
2951
2952
2953
2954class DedentError(LarkError):
2955 pass
2956
2957class Indenter(PostLex, ABC):
2958 paren_level: int
2959 indent_level: List[int]
2960
2961 def __init__(self) -> None:
2962 self.paren_level = 0
2963 self.indent_level = [0]
2964 assert self.tab_len > 0
2965
2966 def handle_NL(self, token: Token) -> Iterator[Token]:
2967 if self.paren_level > 0:
2968 return
2969
2970 yield token
2971
2972 indent_str = token.rsplit('\n', 1)[1] ##
2973
2974 indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
2975
2976 if indent > self.indent_level[-1]:
2977 self.indent_level.append(indent)
2978 yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
2979 else:
2980 while indent < self.indent_level[-1]:
2981 self.indent_level.pop()
2982 yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
2983
2984 if indent != self.indent_level[-1]:
2985 raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
2986
2987 def _process(self, stream):
2988 for token in stream:
2989 if token.type == self.NL_type:
2990 yield from self.handle_NL(token)
2991 else:
2992 yield token
2993
2994 if token.type in self.OPEN_PAREN_types:
2995 self.paren_level += 1
2996 elif token.type in self.CLOSE_PAREN_types:
2997 self.paren_level -= 1
2998 assert self.paren_level >= 0
2999
3000 while len(self.indent_level) > 1:
3001 self.indent_level.pop()
3002 yield Token(self.DEDENT_type, '')
3003
3004 assert self.indent_level == [0], self.indent_level
3005
3006 def process(self, stream):
3007 self.paren_level = 0
3008 self.indent_level = [0]
3009 return self._process(stream)
3010
3011 ##
3012
3013 @property
3014 def always_accept(self):
3015 return (self.NL_type,)
3016
3017 @property
3018 @abstractmethod
3019 def NL_type(self) -> str:
3020 raise NotImplementedError()
3021
3022 @property
3023 @abstractmethod
3024 def OPEN_PAREN_types(self) -> List[str]:
3025 raise NotImplementedError()
3026
3027 @property
3028 @abstractmethod
3029 def CLOSE_PAREN_types(self) -> List[str]:
3030 raise NotImplementedError()
3031
3032 @property
3033 @abstractmethod
3034 def INDENT_type(self) -> str:
3035 raise NotImplementedError()
3036
3037 @property
3038 @abstractmethod
3039 def DEDENT_type(self) -> str:
3040 raise NotImplementedError()
3041
3042 @property
3043 @abstractmethod
3044 def tab_len(self) -> int:
3045 raise NotImplementedError()
3046
3047
3048class PythonIndenter(Indenter):
3049 NL_type = '_NEWLINE'
3050 OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
3051 CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
3052 INDENT_type = '_INDENT'
3053 DEDENT_type = '_DEDENT'
3054 tab_len = 8
3055
3056
3057import base64
3058import pickle
3059import zlib
3060
3061DATA = (
3062{'parser': {'lexer_conf': {'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, 'lexer_type': 'contextual', '__type__': 'LexerConf'}, 'parser_conf': {'rules': [{'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}], 'start': ['start'], 'parser_type': 'lalr', '__type__': 'ParserConf'}, 'parser': {'tokens': {0: 'RBRACE', 1: 'COMMA', 2: '__array_star_0', 3: 'RSQB', 4: 'COLON', 5: '$END', 6: 'LSQB', 7: 'value', 8: 'string', 9: 'LBRACE', 10: 'TRUE', 11: 'ESCAPED_STRING', 12: 'array', 13: 'object', 14: 'NULL', 15: 'SIGNED_NUMBER', 16: 'FALSE', 17: 'item', 18: '_items', 19: 'start', 20: '___items_star_1'}, 'states': {0: {0: (1, {'@': 32}), 1: (1, {'@': 32})}, 1: {2: (0, 3), 1: (0, 4), 3: (0, 24)}, 2: {3: (1, {'@': 28}), 1: (1, {'@': 28}), 0: (1, {'@': 28}), 4: (1, {'@': 28}), 5: (1, {'@': 28})}, 3: {1: (0, 12), 3: (0, 23)}, 4: {6: (0, 9), 7: (0, 31), 8: (0, 10), 9: (0, 16), 10: (0, 6), 11: (0, 2), 12: (0, 17), 13: (0, 32), 14: (0, 28), 15: (0, 8), 16: (0, 20)}, 5: {}, 6: {3: (1, {'@': 17}), 1: (1, {'@': 17}), 0: (1, {'@': 17}), 5: (1, {'@': 17})}, 7: {3: (1, {'@': 22}), 1: (1, {'@': 22}), 0: (1, {'@': 22}), 5: (1, {'@': 22})}, 8: {3: (1, {'@': 16}), 1: (1, {'@': 16}), 0: (1, {'@': 16}), 5: (1, {'@': 16})}, 9: {7: (0, 1), 8: (0, 10), 10: (0, 6), 6: (0, 9), 12: (0, 17), 13: (0, 32), 15: (0, 8), 3: (0, 7), 9: (0, 16), 11: (0, 2), 14: (0, 28), 16: (0, 20)}, 10: {3: (1, {'@': 15}), 1: (1, {'@': 15}), 0: (1, {'@': 15}), 5: (1, {'@': 15})}, 11: {3: (1, {'@': 24}), 1: (1, {'@': 24}), 0: (1, {'@': 24}), 5: (1, {'@': 24})}, 12: {6: (0, 9), 7: (0, 29), 8: (0, 10), 9: (0, 16), 10: (0, 6), 11: (0, 2), 12: (0, 17), 13: (0, 32), 14: (0, 28), 15: (0, 8), 16: (0, 20)}, 13: {6: (0, 9), 8: (0, 10), 9: (0, 16), 10: (0, 6), 11: (0, 2), 7: (0, 15), 12: (0, 17), 13: (0, 32), 14: (0, 28), 15: (0, 8), 16: (0, 20)}, 14: {0: (1, {'@': 31}), 1: (1, {'@': 31})}, 15: {0: (1, {'@': 25}), 1: (1, {'@': 25})}, 16: {0: (0, 11), 17: (0, 30), 8: (0, 18), 11: (0, 2), 18: (0, 19)}, 17: {3: (1, {'@': 14}), 1: (1, {'@': 14}), 0: (1, {'@': 14}), 5: (1, {'@': 14})}, 18: {4: (0, 13)}, 19: {0: (0, 21)}, 20: {3: (1, {'@': 18}), 1: (1, {'@': 18}), 0: (1, {'@': 18}), 5: (1, {'@': 18})}, 21: {3: (1, {'@': 23}), 1: (1, {'@': 23}), 0: (1, {'@': 23}), 5: (1, {'@': 23})}, 22: {1: (0, 27), 0: (1, {'@': 26})}, 23: {3: (1, {'@': 20}), 1: (1, {'@': 20}), 0: (1, {'@': 20}), 5: (1, {'@': 20})}, 24: {3: (1, {'@': 21}), 1: (1, {'@': 21}), 0: (1, {'@': 21}), 5: (1, {'@': 21})}, 25: {5: (1, {'@': 12})}, 26: {6: (0, 9), 8: (0, 10), 19: (0, 5), 9: (0, 16), 10: (0, 6), 11: (0, 2), 7: (0, 25), 12: (0, 17), 13: (0, 32), 14: (0, 28), 15: (0, 8), 16: (0, 20)}, 27: {8: (0, 18), 17: (0, 0), 11: (0, 2)}, 28: {3: (1, {'@': 19}), 1: (1, {'@': 19}), 0: (1, {'@': 19}), 5: (1, {'@': 19})}, 29: {3: (1, {'@': 30}), 1: (1, {'@': 30})}, 30: {1: (0, 33), 20: (0, 22), 0: (1, {'@': 27})}, 31: {3: (1, {'@': 29}), 1: (1, {'@': 29})}, 32: {3: (1, {'@': 13}), 1: (1, {'@': 13}), 0: (1, {'@': 13}), 5: (1, {'@': 13})}, 33: {8: (0, 18), 17: (0, 14), 11: (0, 2)}}, 'start_states': {'start': 26}, 'end_states': {'start': 5}}, '__type__': 'ParsingFrontend'}, 'rules': [{'@': 12}, {'@': 13}, {'@': 14}, {'@': 15}, {'@': 16}, {'@': 17}, {'@': 18}, {'@': 19}, {'@': 20}, {'@': 21}, {'@': 22}, {'@': 23}, {'@': 24}, {'@': 25}, {'@': 26}, {'@': 27}, {'@': 28}, {'@': 29}, {'@': 30}, {'@': 31}, {'@': 32}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': 'normal', 'ambiguity': 'auto', 'regex': False, 'propagate_positions': True, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False, 'import_paths': [], 'source_path': None, '_plugins': {}}, '__type__': 'Lark'}
3063)
3064MEMO = (
3065{0: {'name': 'SIGNED_NUMBER', 'pattern': {'value': '(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 1: {'name': 'ESCAPED_STRING', 'pattern': {'value': '".*?(?<!\\\\)(\\\\\\\\)*?"', 'flags': [], '_width': [2, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 2: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 0, '__type__': 'TerminalDef'}, 3: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 4: {'name': 'TRUE', 'pattern': {'value': 'true', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 5: {'name': 'FALSE', 'pattern': {'value': 'false', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 6: {'name': 'NULL', 'pattern': {'value': 'null', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 7: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 8: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 9: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 10: {'name': 'LBRACE', 'pattern': {'value': '{', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 11: {'name': 'RBRACE', 'pattern': {'value': '}', 'flags': [], '__type__': 'PatternStr'}, 'priority': 0, '__type__': 'TerminalDef'}, 12: {'origin': {'name': Token('RULE', 'start'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'object', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'array', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'SIGNED_NUMBER', 'filter_out': False, '__type__': 'Terminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'TRUE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 4, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'FALSE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 5, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': Token('RULE', 'value'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NULL', 'filter_out': True, '__type__': 'Terminal'}], 'order': 6, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': Token('RULE', 'array'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': Token('RULE', 'array'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': Token('RULE', 'array'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': [False, True, False], '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': Token('RULE', 'object'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': '_items', '__type__': 'NonTerminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': Token('RULE', 'object'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': Token('RULE', 'item'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}, {'name': 'COLON', 'filter_out': False, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': Token('RULE', '_items'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'item', '__type__': 'NonTerminal'}, {'name': '___items_star_1', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': Token('RULE', '_items'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'item', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': Token('RULE', 'string'), '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ESCAPED_STRING', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': '___items_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'item', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 32: {'origin': {'name': '___items_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': '___items_star_1', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'item', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}}
3066)
3067Shift = 0
3068Reduce = 1
3069def Lark_StandAlone(**kwargs):
3070 return Lark._load_from_dict(DATA, MEMO, **kwargs)