1from __future__ import annotations
2
3from typing import TYPE_CHECKING
4
5from ..dequote import dequote_string
6from .lexer import TokenType, kore_lexer
7from .syntax import (
8 DV,
9 AliasDecl,
10 And,
11 App,
12 Axiom,
13 Bottom,
14 Ceil,
15 Claim,
16 Definition,
17 Equals,
18 EVar,
19 Exists,
20 Floor,
21 Forall,
22 Iff,
23 Implies,
24 Import,
25 In,
26 LeftAssoc,
27 Module,
28 Mu,
29 Next,
30 Not,
31 Nu,
32 Or,
33 Rewrites,
34 RightAssoc,
35 SortApp,
36 SortDecl,
37 SortVar,
38 String,
39 SVar,
40 Symbol,
41 SymbolDecl,
42 Top,
43)
44
45if TYPE_CHECKING:
46 from collections.abc import Callable, Iterator
47 from typing import Final, TypeVar, Union
48
49 from .lexer import KoreToken
50 from .syntax import (
51 Assoc,
52 BinaryConn,
53 BinaryPred,
54 MLFixpoint,
55 MLPattern,
56 MLQuant,
57 MultiaryConn,
58 NullaryConn,
59 Pattern,
60 RoundPred,
61 Sentence,
62 Sort,
63 UnaryConn,
64 VarPattern,
65 )
66
67 NC = TypeVar('NC', bound=NullaryConn)
68 UC = TypeVar('UC', bound=Union[UnaryConn, Next])
69 BC = TypeVar('BC', bound=Union[BinaryConn, Rewrites])
70 MC = TypeVar('MC', bound=MultiaryConn)
71 QF = TypeVar('QF', bound=MLQuant)
72 FP = TypeVar('FP', bound=MLFixpoint)
73 RP = TypeVar('RP', bound=RoundPred)
74 BP = TypeVar('BP', bound=BinaryPred)
75 AS = TypeVar('AS', bound=Assoc)
76
77 T = TypeVar('T')
78
79
[docs]
80class KoreParser:
81 _ML_SYMBOLS: Final = {
82 TokenType.ML_TOP: 'top',
83 TokenType.ML_BOTTOM: 'bottom',
84 TokenType.ML_NOT: 'nott',
85 TokenType.ML_AND: 'andd',
86 TokenType.ML_OR: 'orr',
87 TokenType.ML_IMPLIES: 'implies',
88 TokenType.ML_IFF: 'iff',
89 TokenType.ML_EXISTS: 'exists',
90 TokenType.ML_FORALL: 'forall',
91 TokenType.ML_MU: 'mu',
92 TokenType.ML_NU: 'nu',
93 TokenType.ML_CEIL: 'ceil',
94 TokenType.ML_FLOOR: 'floor',
95 TokenType.ML_EQUALS: 'equals',
96 TokenType.ML_IN: 'inn',
97 TokenType.ML_NEXT: 'next',
98 TokenType.ML_REWRITES: 'rewrites',
99 TokenType.ML_DV: 'dv',
100 TokenType.ML_LEFT_ASSOC: 'left_assoc',
101 TokenType.ML_RIGHT_ASSOC: 'right_assoc',
102 }
103
104 _SENTENCE_KWS: Final = {
105 TokenType.KW_IMPORT: 'importt',
106 TokenType.KW_SORT: 'sort_decl',
107 TokenType.KW_HOOKED_SORT: 'hooked_sort_decl',
108 TokenType.KW_SYMBOL: 'symbol_decl',
109 TokenType.KW_HOOKED_SYMBOL: 'hooked_symbol_decl',
110 TokenType.KW_ALIAS: 'alias_decl',
111 TokenType.KW_AXIOM: 'axiom',
112 TokenType.KW_CLAIM: 'claim',
113 }
114
115 _iter: Iterator[KoreToken]
116 _la: KoreToken
117
118 def __init__(self, text: str):
119 self._iter = kore_lexer(text)
120 self._la = next(self._iter)
121
122 @property
123 def eof(self) -> bool:
124 return self._la.type == TokenType.EOF
125
126 def _consume(self) -> str:
127 text = self._la.text
128 self._la = next(self._iter)
129 return text
130
131 def _match(self, token_type: TokenType) -> str:
132 if self._la.type != token_type:
133 raise ValueError(f'Expected {token_type.name}, found: {self._la.type.name}')
134
135 return self._consume()
136
137 def _delimited_list_of(
138 self,
139 parse: Callable[[], T],
140 ldelim: TokenType,
141 rdelim: TokenType,
142 sep: TokenType = TokenType.COMMA,
143 ) -> list[T]:
144 res: list[T] = []
145
146 self._match(ldelim)
147 while self._la.type != rdelim:
148 res.append(parse())
149 if self._la.type != sep:
150 break
151 self._consume()
152 self._consume()
153
154 return res
155
[docs]
156 def id(self) -> str:
157 return self._match(TokenType.ID)
158
[docs]
159 def symbol_id(self) -> str:
160 if self._la.type == TokenType.SYMBOL_ID:
161 return self._consume()
162
163 return self._match(TokenType.ID)
164
[docs]
165 def set_var_id(self) -> str:
166 return self._match(TokenType.SET_VAR_ID)
167
[docs]
168 def sort(self) -> Sort:
169 name = self.id()
170
171 if self._la.type == TokenType.LBRACE:
172 sorts = self._sort_list()
173 return SortApp(name, sorts)
174
175 return SortVar(name)
176
177 def _sort_list(self) -> list[Sort]:
178 return self._delimited_list_of(self.sort, TokenType.LBRACE, TokenType.RBRACE)
179
[docs]
180 def sort_var(self) -> SortVar:
181 name = self._match(TokenType.ID)
182 return SortVar(name)
183
[docs]
184 def sort_app(self) -> SortApp:
185 name = self._match(TokenType.ID)
186 sorts = self._sort_list()
187 return SortApp(name, sorts)
188
[docs]
189 def pattern(self) -> Pattern:
190 if self._la.type == TokenType.STRING:
191 return self.string()
192
193 if self._la.type in self._ML_SYMBOLS:
194 return self.ml_pattern()
195
196 if self._la.type == TokenType.SYMBOL_ID:
197 return self.app()
198
199 if self._la.type == TokenType.SET_VAR_ID:
200 return self.set_var()
201
202 name = self._match(TokenType.ID)
203 if self._la.type == TokenType.COLON:
204 self._consume()
205 sort = self.sort()
206 return EVar(name, sort)
207
208 sorts = self._sort_list()
209 patterns = self._pattern_list()
210 return App(name, sorts, patterns)
211
212 def _pattern_list(self) -> list[Pattern]:
213 return self._delimited_list_of(self.pattern, TokenType.LPAREN, TokenType.RPAREN)
214
[docs]
215 def string(self) -> String:
216 value = self._match(TokenType.STRING)
217 return String(dequote_string(value[1:-1]))
218
[docs]
219 def app(self) -> App:
220 symbol = self.symbol_id()
221 sorts = self._sort_list()
222 patterns = self._pattern_list()
223 return App(symbol, sorts, patterns)
224
[docs]
225 def var_pattern(self) -> VarPattern:
226 if self._la.type == TokenType.SET_VAR_ID:
227 return self.set_var()
228
229 return self.elem_var()
230
[docs]
231 def set_var(self) -> SVar:
232 name = self._match(TokenType.SET_VAR_ID)
233 self._match(TokenType.COLON)
234 sort = self.sort()
235 return SVar(name, sort)
236
[docs]
237 def elem_var(self) -> EVar:
238 name = self._match(TokenType.ID)
239 self._match(TokenType.COLON)
240 sort = self.sort()
241 return EVar(name, sort)
242
[docs]
243 def ml_pattern(self) -> MLPattern:
244 token_type = self._la.type
245 if token_type not in self._ML_SYMBOLS:
246 raise ValueError(f'Exected matching logic symbol, found: {self._la.text}')
247 parse = getattr(self, self._ML_SYMBOLS[token_type])
248 return parse()
249
250 def _nullary(self, token_type: TokenType, cls: type[NC]) -> NC:
251 self._match(token_type)
252 self._match(TokenType.LBRACE)
253 sort = self.sort()
254 self._match(TokenType.RBRACE)
255 self._match(TokenType.LPAREN)
256 self._match(TokenType.RPAREN)
257 # TODO Implement NullaryConn.create(symbol, sort) instead
258 # TODO Consider MLConn.create(symbol, sort, patterns) as well
259 return cls(sort) # type: ignore
260
[docs]
261 def top(self) -> Top:
262 return self._nullary(TokenType.ML_TOP, Top)
263
[docs]
264 def bottom(self) -> Bottom:
265 return self._nullary(TokenType.ML_BOTTOM, Bottom)
266
267 def _unary(self, token_type: TokenType, cls: type[UC]) -> UC:
268 self._match(token_type)
269 self._match(TokenType.LBRACE)
270 sort = self.sort()
271 self._match(TokenType.RBRACE)
272 self._match(TokenType.LPAREN)
273 pattern = self.pattern()
274 self._match(TokenType.RPAREN)
275 return cls(sort, pattern) # type: ignore
276
[docs]
277 def nott(self) -> Not:
278 return self._unary(TokenType.ML_NOT, Not)
279
280 def _binary(self, token_type: TokenType, cls: type[BC]) -> BC:
281 self._match(token_type)
282 self._match(TokenType.LBRACE)
283 sort = self.sort()
284 self._match(TokenType.RBRACE)
285 self._match(TokenType.LPAREN)
286 left = self.pattern()
287 self._match(TokenType.COMMA)
288 right = self.pattern()
289 self._match(TokenType.RPAREN)
290 return cls(sort, left, right) # type: ignore
291
[docs]
292 def implies(self) -> Implies:
293 return self._binary(TokenType.ML_IMPLIES, Implies)
294
[docs]
295 def iff(self) -> Iff:
296 return self._binary(TokenType.ML_IFF, Iff)
297
298 def _multiary(self, token_type: TokenType, cls: type[MC]) -> MC:
299 self._match(token_type)
300 self._match(TokenType.LBRACE)
301 sort = self.sort()
302 self._match(TokenType.RBRACE)
303 ops = self._delimited_list_of(self.pattern, TokenType.LPAREN, TokenType.RPAREN)
304 return cls(sort, ops) # type: ignore
305
[docs]
306 def andd(self) -> And:
307 return self._multiary(TokenType.ML_AND, And)
308
[docs]
309 def orr(self) -> Or:
310 return self._multiary(TokenType.ML_OR, Or)
311
312 def _quantifier(self, token_type: TokenType, cls: type[QF]) -> QF:
313 self._match(token_type)
314 self._match(TokenType.LBRACE)
315 sort = self.sort()
316 self._match(TokenType.RBRACE)
317 self._match(TokenType.LPAREN)
318 var = self.elem_var()
319 self._match(TokenType.COMMA)
320 pattern = self.pattern()
321 self._match(TokenType.RPAREN)
322 return cls(sort, var, pattern) # type: ignore
323
[docs]
324 def exists(self) -> Exists:
325 return self._quantifier(TokenType.ML_EXISTS, Exists)
326
[docs]
327 def forall(self) -> Forall:
328 return self._quantifier(TokenType.ML_FORALL, Forall)
329
330 def _fixpoint(self, token_type: TokenType, cls: type[FP]) -> FP:
331 self._match(token_type)
332 self._match(TokenType.LBRACE)
333 self._match(TokenType.RBRACE)
334 self._match(TokenType.LPAREN)
335 var = self.set_var()
336 self._match(TokenType.COMMA)
337 pattern = self.pattern()
338 self._match(TokenType.RPAREN)
339 return cls(var, pattern) # type: ignore
340
[docs]
341 def mu(self) -> Mu:
342 return self._fixpoint(TokenType.ML_MU, Mu)
343
[docs]
344 def nu(self) -> Nu:
345 return self._fixpoint(TokenType.ML_NU, Nu)
346
347 def _round_pred(self, token_type: TokenType, cls: type[RP]) -> RP:
348 self._match(token_type)
349 self._match(TokenType.LBRACE)
350 op_sort = self.sort()
351 self._match(TokenType.COMMA)
352 sort = self.sort()
353 self._match(TokenType.RBRACE)
354 self._match(TokenType.LPAREN)
355 pattern = self.pattern()
356 self._match(TokenType.RPAREN)
357 return cls(op_sort, sort, pattern) # type: ignore
358
[docs]
359 def ceil(self) -> Ceil:
360 return self._round_pred(TokenType.ML_CEIL, Ceil)
361
[docs]
362 def floor(self) -> Floor:
363 return self._round_pred(TokenType.ML_FLOOR, Floor)
364
365 def _binary_pred(self, token_type: TokenType, cls: type[BP]) -> BP:
366 self._match(token_type)
367 self._match(TokenType.LBRACE)
368 left_sort = self.sort()
369 self._match(TokenType.COMMA)
370 right_sort = self.sort()
371 self._match(TokenType.RBRACE)
372 self._match(TokenType.LPAREN)
373 left = self.pattern()
374 self._match(TokenType.COMMA)
375 right = self.pattern()
376 self._match(TokenType.RPAREN)
377 return cls(left_sort, right_sort, left, right) # type: ignore
378
[docs]
379 def equals(self) -> Equals:
380 return self._binary_pred(TokenType.ML_EQUALS, Equals)
381
[docs]
382 def inn(self) -> In:
383 return self._binary_pred(TokenType.ML_IN, In)
384
[docs]
385 def next(self) -> Next:
386 return self._unary(TokenType.ML_NEXT, Next)
387
[docs]
388 def rewrites(self) -> Rewrites:
389 return self._binary(TokenType.ML_REWRITES, Rewrites)
390
[docs]
391 def dv(self) -> DV:
392 self._match(TokenType.ML_DV)
393 self._match(TokenType.LBRACE)
394 sort = self.sort()
395 self._match(TokenType.RBRACE)
396 self._match(TokenType.LPAREN)
397 value = self.string()
398 self._match(TokenType.RPAREN)
399 return DV(sort, value)
400
401 def _assoc(self, token_type: TokenType, cls: type[AS]) -> AS:
402 self._match(token_type)
403 self._match(TokenType.LBRACE)
404 self._match(TokenType.RBRACE)
405 self._match(TokenType.LPAREN)
406 app = self.app()
407 self._match(TokenType.RPAREN)
408 return cls(app.symbol, app.sorts, app.args) # type: ignore
409
[docs]
410 def left_assoc(self) -> LeftAssoc:
411 return self._assoc(TokenType.ML_LEFT_ASSOC, LeftAssoc)
412
[docs]
413 def right_assoc(self) -> RightAssoc:
414 return self._assoc(TokenType.ML_RIGHT_ASSOC, RightAssoc)
415
416 def _attr_list(self) -> list[App]:
417 return self._delimited_list_of(self.app, TokenType.LBRACK, TokenType.RBRACK)
418
[docs]
419 def sentence(self) -> Sentence:
420 token_type = self._la.type
421
422 if token_type not in self._SENTENCE_KWS:
423 raise ValueError(f'Expected {[kw.name for kw in self._SENTENCE_KWS]}, found: {token_type.name}')
424
425 parse = getattr(self, self._SENTENCE_KWS[token_type])
426 return parse()
427
[docs]
428 def importt(self) -> Import:
429 self._match(TokenType.KW_IMPORT)
430 module_name = self.id()
431 attrs = self._attr_list()
432 return Import(module_name, attrs)
433
[docs]
434 def sort_decl(self) -> SortDecl:
435 self._match(TokenType.KW_SORT)
436 name = self.id()
437 vars = self._sort_var_list()
438 attrs = self._attr_list()
439 return SortDecl(name, vars, attrs, hooked=False)
440
[docs]
441 def hooked_sort_decl(self) -> SortDecl:
442 self._match(TokenType.KW_HOOKED_SORT)
443 name = self.id()
444 vars = self._sort_var_list()
445 attrs = self._attr_list()
446 return SortDecl(name, vars, attrs, hooked=True)
447
448 def _sort_var_list(self) -> list[SortVar]:
449 return self._delimited_list_of(self.sort_var, TokenType.LBRACE, TokenType.RBRACE)
450
[docs]
451 def symbol_decl(self) -> SymbolDecl:
452 self._match(TokenType.KW_SYMBOL)
453 symbol = self.symbol()
454 sort_params = self._sort_param_list()
455 self._match(TokenType.COLON)
456 sort = self.sort()
457 attrs = self._attr_list()
458 return SymbolDecl(symbol, sort_params, sort, attrs, hooked=False)
459
[docs]
460 def hooked_symbol_decl(self) -> SymbolDecl:
461 self._match(TokenType.KW_HOOKED_SYMBOL)
462 symbol = self.symbol()
463 sort_params = self._sort_param_list()
464 self._match(TokenType.COLON)
465 sort = self.sort()
466 attrs = self._attr_list()
467 return SymbolDecl(symbol, sort_params, sort, attrs, hooked=True)
468
[docs]
469 def alias_decl(self) -> AliasDecl:
470 self._match(TokenType.KW_ALIAS)
471 symbol = self.symbol()
472 sort_params = self._sort_param_list()
473 self._match(TokenType.COLON)
474 sort = self.sort()
475 self._match(TokenType.KW_WHERE)
476 left = self.app()
477 self._match(TokenType.WALRUS)
478 right = self.pattern()
479 attrs = self._attr_list()
480 return AliasDecl(symbol, sort_params, sort, left, right, attrs)
481
482 def _sort_param_list(self) -> list[Sort]:
483 return self._delimited_list_of(self.sort, TokenType.LPAREN, TokenType.RPAREN)
484
485 # TODO remove once \left-assoc{}(\or{...}(...)) is no longer supported
[docs]
486 def multi_or(self) -> list[Pattern]:
487 self._match(TokenType.ML_LEFT_ASSOC)
488 self._match(TokenType.LBRACE)
489 self._match(TokenType.RBRACE)
490 self._match(TokenType.LPAREN)
491 self._match(TokenType.ML_OR)
492 self._match(TokenType.LBRACE)
493 self.sort()
494 self._match(TokenType.RBRACE)
495 patterns = self._pattern_list()
496 self._match(TokenType.RPAREN)
497 return patterns
498
[docs]
499 def symbol(self) -> Symbol:
500 name = self.symbol_id()
501 vars = self._sort_var_list()
502 return Symbol(name, vars)
503
[docs]
504 def axiom(self) -> Axiom:
505 self._match(TokenType.KW_AXIOM)
506 vars = self._sort_var_list()
507 pattern = self.pattern()
508 attrs = self._attr_list()
509 return Axiom(vars, pattern, attrs)
510
[docs]
511 def claim(self) -> Claim:
512 self._match(TokenType.KW_CLAIM)
513 vars = self._sort_var_list()
514 pattern = self.pattern()
515 attrs = self._attr_list()
516 return Claim(vars, pattern, attrs)
517
[docs]
518 def module(self) -> Module:
519 self._match(TokenType.KW_MODULE)
520 name = self.id()
521
522 sentences: list[Sentence] = []
523 while self._la.type != TokenType.KW_ENDMODULE:
524 sentences.append(self.sentence())
525 self._consume()
526
527 attrs = self._attr_list()
528
529 return Module(name, sentences, attrs)
530
[docs]
531 def definition(self) -> Definition:
532 attrs = self._attr_list()
533
534 modules: list[Module] = []
535 while self._la.type != TokenType.EOF:
536 modules.append(self.module())
537
538 return Definition(modules, attrs)