1from __future__ import annotations
2
3import re
4from typing import TYPE_CHECKING
5
6from .inner import KApply, KLabel, KSequence, KToken, KVariable
7from .lexer import TokenType, lexer
8
9if TYPE_CHECKING:
10 from collections.abc import Iterable, Iterator
11 from typing import Final
12
13 from . import KInner
14 from .lexer import Token
15
16
17TT = TokenType
18
19
[docs]
20class KAstParser:
21 _it: Iterator[Token]
22 _la: Token
23
24 def __init__(self, it: Iterable[str]):
25 self._it = lexer(it)
26 self._la = next(self._it)
27
28 def _consume(self) -> str:
29 text = self._la.text
30 self._la = next(self._it)
31 return text
32
33 def _match(self, expected: TokenType) -> str:
34 if self._la.type is not expected:
35 raise self._unexpected_token(self._la, [expected])
36 text = self._la.text
37 self._la = next(self._it)
38 return text
39
40 @staticmethod
41 def _unexpected_token(token: Token, expected: Iterable[TokenType] = ()) -> ValueError:
42 types = sorted(expected, key=lambda typ: typ.name)
43
44 if not types:
45 return ValueError(f'Unexpected token: {token.text!r}')
46
47 if len(types) == 1:
48 typ = types[0]
49 return ValueError(f'Unexpected token: {token.text!r}. Expected: {typ.name}')
50
51 type_str = ', '.join(typ.name for typ in types)
52 return ValueError(f'Unexpected token: {token.text!r}. Expected one of: {type_str}')
53
[docs]
54 def eof(self) -> bool:
55 return self._la.type is TT.EOF
56
[docs]
57 def k(self) -> KInner:
58 if self._la.type is TT.DOTK:
59 self._consume()
60 return KSequence()
61
62 items = [self.kitem()]
63 while self._la.type is TT.KSEQ:
64 self._consume()
65 items.append(self.kitem())
66
67 if len(items) > 1:
68 return KSequence(items)
69
70 return items[0]
71
[docs]
72 def kitem(self) -> KInner:
73 match self._la.type:
74 case TT.VARIABLE:
75 name = self._consume()
76 sort: str | None = None
77 if self._la.type is TT.COLON:
78 self._consume()
79 sort = self._match(TT.SORT)
80 return KVariable(name, sort)
81
82 case TT.TOKEN:
83 self._consume()
84 self._match(TT.LPAREN)
85 token = _unquote(self._match(TT.STRING))
86 self._match(TT.COMMA)
87 sort = _unquote(self._match(TT.STRING))
88 self._match(TT.RPAREN)
89 return KToken(token, sort)
90
91 case TT.ID | TT.KLABEL:
92 label = self.klabel()
93 self._match(TT.LPAREN)
94 args = self.klist()
95 self._match(TT.RPAREN)
96 return KApply(label, args)
97
98 case _:
99 raise self._unexpected_token(self._la, [TT.VARIABLE, TT.TOKEN, TT.ID, TT.KLABEL])
100
[docs]
101 def klabel(self) -> KLabel:
102 match self._la.type:
103 case TT.ID:
104 return KLabel(self._consume())
105 case TT.KLABEL:
106 return KLabel(_unquote(self._consume()))
107 case _:
108 raise self._unexpected_token(self._la, [TT.ID, TT.KLABEL])
109
[docs]
110 def klist(self) -> list[KInner]:
111 if self._la.type is TT.DOTKLIST:
112 self._consume()
113 return []
114
115 res = [self.k()]
116 while self._la.type is TT.COMMA:
117 self._consume()
118 res.append(self.k())
119 return res
120
121
122_UNQUOTE_PATTERN: Final = re.compile(r'\\.')
123
124
125def _unquote(s: str) -> str:
126 return _UNQUOTE_PATTERN.sub(lambda m: m.group(0)[1], s[1:-1])