Source code for pyk.kast.parser

  1from __future__ import annotations
  2
  3import re
  4from typing import TYPE_CHECKING
  5
  6from .inner import KApply, KLabel, KSequence, KToken, KVariable
  7from .lexer import TokenType, lexer
  8
  9if TYPE_CHECKING:
 10    from collections.abc import Iterable, Iterator
 11    from typing import Final
 12
 13    from . import KInner
 14    from .lexer import Token
 15
 16
 17TT = TokenType
 18
 19
[docs] 20class KAstParser: 21 _it: Iterator[Token] 22 _la: Token 23 24 def __init__(self, it: Iterable[str]): 25 self._it = lexer(it) 26 self._la = next(self._it) 27 28 def _consume(self) -> str: 29 text = self._la.text 30 self._la = next(self._it) 31 return text 32 33 def _match(self, expected: TokenType) -> str: 34 if self._la.type is not expected: 35 raise self._unexpected_token(self._la, [expected]) 36 text = self._la.text 37 self._la = next(self._it) 38 return text 39 40 @staticmethod 41 def _unexpected_token(token: Token, expected: Iterable[TokenType] = ()) -> ValueError: 42 types = sorted(expected, key=lambda typ: typ.name) 43 44 if not types: 45 return ValueError(f'Unexpected token: {token.text!r}') 46 47 if len(types) == 1: 48 typ = types[0] 49 return ValueError(f'Unexpected token: {token.text!r}. Expected: {typ.name}') 50 51 type_str = ', '.join(typ.name for typ in types) 52 return ValueError(f'Unexpected token: {token.text!r}. Expected one of: {type_str}') 53
[docs] 54 def eof(self) -> bool: 55 return self._la.type is TT.EOF
56
[docs] 57 def k(self) -> KInner: 58 if self._la.type is TT.DOTK: 59 self._consume() 60 return KSequence() 61 62 items = [self.kitem()] 63 while self._la.type is TT.KSEQ: 64 self._consume() 65 items.append(self.kitem()) 66 67 if len(items) > 1: 68 return KSequence(items) 69 70 return items[0]
71
[docs] 72 def kitem(self) -> KInner: 73 match self._la.type: 74 case TT.VARIABLE: 75 name = self._consume() 76 sort: str | None = None 77 if self._la.type is TT.COLON: 78 self._consume() 79 sort = self._match(TT.SORT) 80 return KVariable(name, sort) 81 82 case TT.TOKEN: 83 self._consume() 84 self._match(TT.LPAREN) 85 token = _unquote(self._match(TT.STRING)) 86 self._match(TT.COMMA) 87 sort = _unquote(self._match(TT.STRING)) 88 self._match(TT.RPAREN) 89 return KToken(token, sort) 90 91 case TT.ID | TT.KLABEL: 92 label = self.klabel() 93 self._match(TT.LPAREN) 94 args = self.klist() 95 self._match(TT.RPAREN) 96 return KApply(label, args) 97 98 case _: 99 raise self._unexpected_token(self._la, [TT.VARIABLE, TT.TOKEN, TT.ID, TT.KLABEL])
100
[docs] 101 def klabel(self) -> KLabel: 102 match self._la.type: 103 case TT.ID: 104 return KLabel(self._consume()) 105 case TT.KLABEL: 106 return KLabel(_unquote(self._consume())) 107 case _: 108 raise self._unexpected_token(self._la, [TT.ID, TT.KLABEL])
109
[docs] 110 def klist(self) -> list[KInner]: 111 if self._la.type is TT.DOTKLIST: 112 self._consume() 113 return [] 114 115 res = [self.k()] 116 while self._la.type is TT.COMMA: 117 self._consume() 118 res.append(self.k()) 119 return res
120 121 122_UNQUOTE_PATTERN: Final = re.compile(r'\\.') 123 124 125def _unquote(s: str) -> str: 126 return _UNQUOTE_PATTERN.sub(lambda m: m.group(0)[1], s[1:-1])