Source code for pyk.kore.parser

  1from __future__ import annotations
  2
  3from typing import TYPE_CHECKING
  4
  5from ..dequote import dequote_string
  6from .lexer import TokenType, kore_lexer
  7from .syntax import (
  8    DV,
  9    AliasDecl,
 10    And,
 11    App,
 12    Axiom,
 13    Bottom,
 14    Ceil,
 15    Claim,
 16    Definition,
 17    Equals,
 18    EVar,
 19    Exists,
 20    Floor,
 21    Forall,
 22    Iff,
 23    Implies,
 24    Import,
 25    In,
 26    LeftAssoc,
 27    Module,
 28    Mu,
 29    Next,
 30    Not,
 31    Nu,
 32    Or,
 33    Rewrites,
 34    RightAssoc,
 35    SortApp,
 36    SortDecl,
 37    SortVar,
 38    String,
 39    SVar,
 40    Symbol,
 41    SymbolDecl,
 42    Top,
 43)
 44
 45if TYPE_CHECKING:
 46    from collections.abc import Callable, Iterator
 47    from typing import Final, TypeVar, Union
 48
 49    from .lexer import KoreToken
 50    from .syntax import (
 51        Assoc,
 52        BinaryConn,
 53        BinaryPred,
 54        MLFixpoint,
 55        MLPattern,
 56        MLQuant,
 57        MultiaryConn,
 58        NullaryConn,
 59        Pattern,
 60        RoundPred,
 61        Sentence,
 62        Sort,
 63        UnaryConn,
 64        VarPattern,
 65    )
 66
 67    NC = TypeVar('NC', bound=NullaryConn)
 68    UC = TypeVar('UC', bound=Union[UnaryConn, Next])
 69    BC = TypeVar('BC', bound=Union[BinaryConn, Rewrites])
 70    MC = TypeVar('MC', bound=MultiaryConn)
 71    QF = TypeVar('QF', bound=MLQuant)
 72    FP = TypeVar('FP', bound=MLFixpoint)
 73    RP = TypeVar('RP', bound=RoundPred)
 74    BP = TypeVar('BP', bound=BinaryPred)
 75    AS = TypeVar('AS', bound=Assoc)
 76
 77    T = TypeVar('T')
 78
 79
[docs] 80class KoreParser: 81 _ML_SYMBOLS: Final = { 82 TokenType.ML_TOP: 'top', 83 TokenType.ML_BOTTOM: 'bottom', 84 TokenType.ML_NOT: 'nott', 85 TokenType.ML_AND: 'andd', 86 TokenType.ML_OR: 'orr', 87 TokenType.ML_IMPLIES: 'implies', 88 TokenType.ML_IFF: 'iff', 89 TokenType.ML_EXISTS: 'exists', 90 TokenType.ML_FORALL: 'forall', 91 TokenType.ML_MU: 'mu', 92 TokenType.ML_NU: 'nu', 93 TokenType.ML_CEIL: 'ceil', 94 TokenType.ML_FLOOR: 'floor', 95 TokenType.ML_EQUALS: 'equals', 96 TokenType.ML_IN: 'inn', 97 TokenType.ML_NEXT: 'next', 98 TokenType.ML_REWRITES: 'rewrites', 99 TokenType.ML_DV: 'dv', 100 TokenType.ML_LEFT_ASSOC: 'left_assoc', 101 TokenType.ML_RIGHT_ASSOC: 'right_assoc', 102 } 103 104 _SENTENCE_KWS: Final = { 105 TokenType.KW_IMPORT: 'importt', 106 TokenType.KW_SORT: 'sort_decl', 107 TokenType.KW_HOOKED_SORT: 'hooked_sort_decl', 108 TokenType.KW_SYMBOL: 'symbol_decl', 109 TokenType.KW_HOOKED_SYMBOL: 'hooked_symbol_decl', 110 TokenType.KW_ALIAS: 'alias_decl', 111 TokenType.KW_AXIOM: 'axiom', 112 TokenType.KW_CLAIM: 'claim', 113 } 114 115 _iter: Iterator[KoreToken] 116 _la: KoreToken 117 118 def __init__(self, text: str): 119 self._iter = kore_lexer(text) 120 self._la = next(self._iter) 121 122 @property 123 def eof(self) -> bool: 124 return self._la.type == TokenType.EOF 125 126 def _consume(self) -> str: 127 text = self._la.text 128 self._la = next(self._iter) 129 return text 130 131 def _match(self, token_type: TokenType) -> str: 132 if self._la.type != token_type: 133 raise ValueError(f'Expected {token_type.name}, found: {self._la.type.name}') 134 135 return self._consume() 136 137 def _delimited_list_of( 138 self, 139 parse: Callable[[], T], 140 ldelim: TokenType, 141 rdelim: TokenType, 142 sep: TokenType = TokenType.COMMA, 143 ) -> list[T]: 144 res: list[T] = [] 145 146 self._match(ldelim) 147 while self._la.type != rdelim: 148 res.append(parse()) 149 if self._la.type != sep: 150 break 151 self._consume() 152 self._consume() 153 154 return res 155
[docs] 156 def id(self) -> str: 157 return self._match(TokenType.ID)
158
[docs] 159 def symbol_id(self) -> str: 160 if self._la.type == TokenType.SYMBOL_ID: 161 return self._consume() 162 163 return self._match(TokenType.ID)
164
[docs] 165 def set_var_id(self) -> str: 166 return self._match(TokenType.SET_VAR_ID)
167
[docs] 168 def sort(self) -> Sort: 169 name = self.id() 170 171 if self._la.type == TokenType.LBRACE: 172 sorts = self._sort_list() 173 return SortApp(name, sorts) 174 175 return SortVar(name)
176 177 def _sort_list(self) -> list[Sort]: 178 return self._delimited_list_of(self.sort, TokenType.LBRACE, TokenType.RBRACE) 179
[docs] 180 def sort_var(self) -> SortVar: 181 name = self._match(TokenType.ID) 182 return SortVar(name)
183
[docs] 184 def sort_app(self) -> SortApp: 185 name = self._match(TokenType.ID) 186 sorts = self._sort_list() 187 return SortApp(name, sorts)
188
[docs] 189 def pattern(self) -> Pattern: 190 if self._la.type == TokenType.STRING: 191 return self.string() 192 193 if self._la.type in self._ML_SYMBOLS: 194 return self.ml_pattern() 195 196 if self._la.type == TokenType.SYMBOL_ID: 197 return self.app() 198 199 if self._la.type == TokenType.SET_VAR_ID: 200 return self.set_var() 201 202 name = self._match(TokenType.ID) 203 if self._la.type == TokenType.COLON: 204 self._consume() 205 sort = self.sort() 206 return EVar(name, sort) 207 208 sorts = self._sort_list() 209 patterns = self._pattern_list() 210 return App(name, sorts, patterns)
211 212 def _pattern_list(self) -> list[Pattern]: 213 return self._delimited_list_of(self.pattern, TokenType.LPAREN, TokenType.RPAREN) 214
[docs] 215 def string(self) -> String: 216 value = self._match(TokenType.STRING) 217 return String(dequote_string(value[1:-1]))
218
[docs] 219 def app(self) -> App: 220 symbol = self.symbol_id() 221 sorts = self._sort_list() 222 patterns = self._pattern_list() 223 return App(symbol, sorts, patterns)
224
[docs] 225 def var_pattern(self) -> VarPattern: 226 if self._la.type == TokenType.SET_VAR_ID: 227 return self.set_var() 228 229 return self.elem_var()
230
[docs] 231 def set_var(self) -> SVar: 232 name = self._match(TokenType.SET_VAR_ID) 233 self._match(TokenType.COLON) 234 sort = self.sort() 235 return SVar(name, sort)
236
[docs] 237 def elem_var(self) -> EVar: 238 name = self._match(TokenType.ID) 239 self._match(TokenType.COLON) 240 sort = self.sort() 241 return EVar(name, sort)
242
[docs] 243 def ml_pattern(self) -> MLPattern: 244 token_type = self._la.type 245 if token_type not in self._ML_SYMBOLS: 246 raise ValueError(f'Exected matching logic symbol, found: {self._la.text}') 247 parse = getattr(self, self._ML_SYMBOLS[token_type]) 248 return parse()
249 250 def _nullary(self, token_type: TokenType, cls: type[NC]) -> NC: 251 self._match(token_type) 252 self._match(TokenType.LBRACE) 253 sort = self.sort() 254 self._match(TokenType.RBRACE) 255 self._match(TokenType.LPAREN) 256 self._match(TokenType.RPAREN) 257 # TODO Implement NullaryConn.create(symbol, sort) instead 258 # TODO Consider MLConn.create(symbol, sort, patterns) as well 259 return cls(sort) # type: ignore 260
[docs] 261 def top(self) -> Top: 262 return self._nullary(TokenType.ML_TOP, Top)
263
[docs] 264 def bottom(self) -> Bottom: 265 return self._nullary(TokenType.ML_BOTTOM, Bottom)
266 267 def _unary(self, token_type: TokenType, cls: type[UC]) -> UC: 268 self._match(token_type) 269 self._match(TokenType.LBRACE) 270 sort = self.sort() 271 self._match(TokenType.RBRACE) 272 self._match(TokenType.LPAREN) 273 pattern = self.pattern() 274 self._match(TokenType.RPAREN) 275 return cls(sort, pattern) # type: ignore 276
[docs] 277 def nott(self) -> Not: 278 return self._unary(TokenType.ML_NOT, Not)
279 280 def _binary(self, token_type: TokenType, cls: type[BC]) -> BC: 281 self._match(token_type) 282 self._match(TokenType.LBRACE) 283 sort = self.sort() 284 self._match(TokenType.RBRACE) 285 self._match(TokenType.LPAREN) 286 left = self.pattern() 287 self._match(TokenType.COMMA) 288 right = self.pattern() 289 self._match(TokenType.RPAREN) 290 return cls(sort, left, right) # type: ignore 291
[docs] 292 def implies(self) -> Implies: 293 return self._binary(TokenType.ML_IMPLIES, Implies)
294
[docs] 295 def iff(self) -> Iff: 296 return self._binary(TokenType.ML_IFF, Iff)
297 298 def _multiary(self, token_type: TokenType, cls: type[MC]) -> MC: 299 self._match(token_type) 300 self._match(TokenType.LBRACE) 301 sort = self.sort() 302 self._match(TokenType.RBRACE) 303 ops = self._delimited_list_of(self.pattern, TokenType.LPAREN, TokenType.RPAREN) 304 return cls(sort, ops) # type: ignore 305
[docs] 306 def andd(self) -> And: 307 return self._multiary(TokenType.ML_AND, And)
308
[docs] 309 def orr(self) -> Or: 310 return self._multiary(TokenType.ML_OR, Or)
311 312 def _quantifier(self, token_type: TokenType, cls: type[QF]) -> QF: 313 self._match(token_type) 314 self._match(TokenType.LBRACE) 315 sort = self.sort() 316 self._match(TokenType.RBRACE) 317 self._match(TokenType.LPAREN) 318 var = self.elem_var() 319 self._match(TokenType.COMMA) 320 pattern = self.pattern() 321 self._match(TokenType.RPAREN) 322 return cls(sort, var, pattern) # type: ignore 323
[docs] 324 def exists(self) -> Exists: 325 return self._quantifier(TokenType.ML_EXISTS, Exists)
326
[docs] 327 def forall(self) -> Forall: 328 return self._quantifier(TokenType.ML_FORALL, Forall)
329 330 def _fixpoint(self, token_type: TokenType, cls: type[FP]) -> FP: 331 self._match(token_type) 332 self._match(TokenType.LBRACE) 333 self._match(TokenType.RBRACE) 334 self._match(TokenType.LPAREN) 335 var = self.set_var() 336 self._match(TokenType.COMMA) 337 pattern = self.pattern() 338 self._match(TokenType.RPAREN) 339 return cls(var, pattern) # type: ignore 340
[docs] 341 def mu(self) -> Mu: 342 return self._fixpoint(TokenType.ML_MU, Mu)
343
[docs] 344 def nu(self) -> Nu: 345 return self._fixpoint(TokenType.ML_NU, Nu)
346 347 def _round_pred(self, token_type: TokenType, cls: type[RP]) -> RP: 348 self._match(token_type) 349 self._match(TokenType.LBRACE) 350 op_sort = self.sort() 351 self._match(TokenType.COMMA) 352 sort = self.sort() 353 self._match(TokenType.RBRACE) 354 self._match(TokenType.LPAREN) 355 pattern = self.pattern() 356 self._match(TokenType.RPAREN) 357 return cls(op_sort, sort, pattern) # type: ignore 358
[docs] 359 def ceil(self) -> Ceil: 360 return self._round_pred(TokenType.ML_CEIL, Ceil)
361
[docs] 362 def floor(self) -> Floor: 363 return self._round_pred(TokenType.ML_FLOOR, Floor)
364 365 def _binary_pred(self, token_type: TokenType, cls: type[BP]) -> BP: 366 self._match(token_type) 367 self._match(TokenType.LBRACE) 368 left_sort = self.sort() 369 self._match(TokenType.COMMA) 370 right_sort = self.sort() 371 self._match(TokenType.RBRACE) 372 self._match(TokenType.LPAREN) 373 left = self.pattern() 374 self._match(TokenType.COMMA) 375 right = self.pattern() 376 self._match(TokenType.RPAREN) 377 return cls(left_sort, right_sort, left, right) # type: ignore 378
[docs] 379 def equals(self) -> Equals: 380 return self._binary_pred(TokenType.ML_EQUALS, Equals)
381
[docs] 382 def inn(self) -> In: 383 return self._binary_pred(TokenType.ML_IN, In)
384
[docs] 385 def next(self) -> Next: 386 return self._unary(TokenType.ML_NEXT, Next)
387
[docs] 388 def rewrites(self) -> Rewrites: 389 return self._binary(TokenType.ML_REWRITES, Rewrites)
390
[docs] 391 def dv(self) -> DV: 392 self._match(TokenType.ML_DV) 393 self._match(TokenType.LBRACE) 394 sort = self.sort() 395 self._match(TokenType.RBRACE) 396 self._match(TokenType.LPAREN) 397 value = self.string() 398 self._match(TokenType.RPAREN) 399 return DV(sort, value)
400 401 def _assoc(self, token_type: TokenType, cls: type[AS]) -> AS: 402 self._match(token_type) 403 self._match(TokenType.LBRACE) 404 self._match(TokenType.RBRACE) 405 self._match(TokenType.LPAREN) 406 app = self.app() 407 self._match(TokenType.RPAREN) 408 return cls(app.symbol, app.sorts, app.args) # type: ignore 409
[docs] 410 def left_assoc(self) -> LeftAssoc: 411 return self._assoc(TokenType.ML_LEFT_ASSOC, LeftAssoc)
412
[docs] 413 def right_assoc(self) -> RightAssoc: 414 return self._assoc(TokenType.ML_RIGHT_ASSOC, RightAssoc)
415 416 def _attr_list(self) -> list[App]: 417 return self._delimited_list_of(self.app, TokenType.LBRACK, TokenType.RBRACK) 418
[docs] 419 def sentence(self) -> Sentence: 420 token_type = self._la.type 421 422 if token_type not in self._SENTENCE_KWS: 423 raise ValueError(f'Expected {[kw.name for kw in self._SENTENCE_KWS]}, found: {token_type.name}') 424 425 parse = getattr(self, self._SENTENCE_KWS[token_type]) 426 return parse()
427
[docs] 428 def importt(self) -> Import: 429 self._match(TokenType.KW_IMPORT) 430 module_name = self.id() 431 attrs = self._attr_list() 432 return Import(module_name, attrs)
433
[docs] 434 def sort_decl(self) -> SortDecl: 435 self._match(TokenType.KW_SORT) 436 name = self.id() 437 vars = self._sort_var_list() 438 attrs = self._attr_list() 439 return SortDecl(name, vars, attrs, hooked=False)
440
[docs] 441 def hooked_sort_decl(self) -> SortDecl: 442 self._match(TokenType.KW_HOOKED_SORT) 443 name = self.id() 444 vars = self._sort_var_list() 445 attrs = self._attr_list() 446 return SortDecl(name, vars, attrs, hooked=True)
447 448 def _sort_var_list(self) -> list[SortVar]: 449 return self._delimited_list_of(self.sort_var, TokenType.LBRACE, TokenType.RBRACE) 450
[docs] 451 def symbol_decl(self) -> SymbolDecl: 452 self._match(TokenType.KW_SYMBOL) 453 symbol = self.symbol() 454 sort_params = self._sort_param_list() 455 self._match(TokenType.COLON) 456 sort = self.sort() 457 attrs = self._attr_list() 458 return SymbolDecl(symbol, sort_params, sort, attrs, hooked=False)
459
[docs] 460 def hooked_symbol_decl(self) -> SymbolDecl: 461 self._match(TokenType.KW_HOOKED_SYMBOL) 462 symbol = self.symbol() 463 sort_params = self._sort_param_list() 464 self._match(TokenType.COLON) 465 sort = self.sort() 466 attrs = self._attr_list() 467 return SymbolDecl(symbol, sort_params, sort, attrs, hooked=True)
468
[docs] 469 def alias_decl(self) -> AliasDecl: 470 self._match(TokenType.KW_ALIAS) 471 symbol = self.symbol() 472 sort_params = self._sort_param_list() 473 self._match(TokenType.COLON) 474 sort = self.sort() 475 self._match(TokenType.KW_WHERE) 476 left = self.app() 477 self._match(TokenType.WALRUS) 478 right = self.pattern() 479 attrs = self._attr_list() 480 return AliasDecl(symbol, sort_params, sort, left, right, attrs)
481 482 def _sort_param_list(self) -> list[Sort]: 483 return self._delimited_list_of(self.sort, TokenType.LPAREN, TokenType.RPAREN) 484 485 # TODO remove once \left-assoc{}(\or{...}(...)) is no longer supported
[docs] 486 def multi_or(self) -> list[Pattern]: 487 self._match(TokenType.ML_LEFT_ASSOC) 488 self._match(TokenType.LBRACE) 489 self._match(TokenType.RBRACE) 490 self._match(TokenType.LPAREN) 491 self._match(TokenType.ML_OR) 492 self._match(TokenType.LBRACE) 493 self.sort() 494 self._match(TokenType.RBRACE) 495 patterns = self._pattern_list() 496 self._match(TokenType.RPAREN) 497 return patterns
498
[docs] 499 def symbol(self) -> Symbol: 500 name = self.symbol_id() 501 vars = self._sort_var_list() 502 return Symbol(name, vars)
503
[docs] 504 def axiom(self) -> Axiom: 505 self._match(TokenType.KW_AXIOM) 506 vars = self._sort_var_list() 507 pattern = self.pattern() 508 attrs = self._attr_list() 509 return Axiom(vars, pattern, attrs)
510
[docs] 511 def claim(self) -> Claim: 512 self._match(TokenType.KW_CLAIM) 513 vars = self._sort_var_list() 514 pattern = self.pattern() 515 attrs = self._attr_list() 516 return Claim(vars, pattern, attrs)
517
[docs] 518 def module(self) -> Module: 519 self._match(TokenType.KW_MODULE) 520 name = self.id() 521 522 sentences: list[Sentence] = [] 523 while self._la.type != TokenType.KW_ENDMODULE: 524 sentences.append(self.sentence()) 525 self._consume() 526 527 attrs = self._attr_list() 528 529 return Module(name, sentences, attrs)
530
[docs] 531 def definition(self) -> Definition: 532 attrs = self._attr_list() 533 534 modules: list[Module] = [] 535 while self._la.type != TokenType.EOF: 536 modules.append(self.module()) 537 538 return Definition(modules, attrs)