Source code for pyk.kast.parser

  1from __future__ import annotations
  2
  3import re
  4from typing import TYPE_CHECKING
  5
  6from .inner import KApply, KLabel, KSequence, KToken, KVariable
  7from .lexer import TokenType, lexer
  8
  9if TYPE_CHECKING:
 10    from collections.abc import Iterable, Iterator
 11    from typing import Final
 12
 13    from . import KInner
 14    from .lexer import Token
 15
 16
 17TT = TokenType
 18
 19

[docs]
 20class KAstParser:
 21    _it: Iterator[Token]
 22    _la: Token
 23
 24    def __init__(self, it: Iterable[str]):
 25        self._it = lexer(it)
 26        self._la = next(self._it)
 27
 28    def _consume(self) -> str:
 29        text = self._la.text
 30        self._la = next(self._it)
 31        return text
 32
 33    def _match(self, expected: TokenType) -> str:
 34        if self._la.type is not expected:
 35            raise self._unexpected_token(self._la, [expected])
 36        text = self._la.text
 37        self._la = next(self._it)
 38        return text
 39
 40    @staticmethod
 41    def _unexpected_token(token: Token, expected: Iterable[TokenType] = ()) -> ValueError:
 42        types = sorted(expected, key=lambda typ: typ.name)
 43
 44        if not types:
 45            return ValueError(f'Unexpected token: {token.text!r}')
 46
 47        if len(types) == 1:
 48            typ = types[0]
 49            return ValueError(f'Unexpected token: {token.text!r}. Expected: {typ.name}')
 50
 51        type_str = ', '.join(typ.name for typ in types)
 52        return ValueError(f'Unexpected token: {token.text!r}. Expected one of: {type_str}')
 53

[docs]
 54    def eof(self) -> bool:
 55        return self._la.type is TT.EOF

 56

[docs]
 57    def k(self) -> KInner:
 58        if self._la.type is TT.DOTK:
 59            self._consume()
 60            return KSequence()
 61
 62        items = [self.kitem()]
 63        while self._la.type is TT.KSEQ:
 64            self._consume()
 65            items.append(self.kitem())
 66
 67        if len(items) > 1:
 68            return KSequence(items)
 69
 70        return items[0]

 71

[docs]
 72    def kitem(self) -> KInner:
 73        match self._la.type:
 74            case TT.VARIABLE:
 75                name = self._consume()
 76                sort: str | None = None
 77                if self._la.type is TT.COLON:
 78                    self._consume()
 79                    sort = self._match(TT.SORT)
 80                return KVariable(name, sort)
 81
 82            case TT.TOKEN:
 83                self._consume()
 84                self._match(TT.LPAREN)
 85                token = _unquote(self._match(TT.STRING))
 86                self._match(TT.COMMA)
 87                sort = _unquote(self._match(TT.STRING))
 88                self._match(TT.RPAREN)
 89                return KToken(token, sort)
 90
 91            case TT.ID | TT.KLABEL:
 92                label = self.klabel()
 93                self._match(TT.LPAREN)
 94                args = self.klist()
 95                self._match(TT.RPAREN)
 96                return KApply(label, args)
 97
 98            case _:
 99                raise self._unexpected_token(self._la, [TT.VARIABLE, TT.TOKEN, TT.ID, TT.KLABEL])

100

[docs]
101    def klabel(self) -> KLabel:
102        match self._la.type:
103            case TT.ID:
104                return KLabel(self._consume())
105            case TT.KLABEL:
106                return KLabel(_unquote(self._consume()))
107            case _:
108                raise self._unexpected_token(self._la, [TT.ID, TT.KLABEL])

109

[docs]
110    def klist(self) -> list[KInner]:
111        if self._la.type is TT.DOTKLIST:
112            self._consume()
113            return []
114
115        res = [self.k()]
116        while self._la.type is TT.COMMA:
117            self._consume()
118            res.append(self.k())
119        return res


120
121
122_UNQUOTE_PATTERN: Final = re.compile(r'\\.')
123
124
125def _unquote(s: str) -> str:
126    return _UNQUOTE_PATTERN.sub(lambda m: m.group(0)[1], s[1:-1])