Source code for pyk.dequote

  1from __future__ import annotations
  2
  3from typing import TYPE_CHECKING
  4
  5if TYPE_CHECKING:
  6    from collections.abc import Iterable, Iterator
  7    from typing import Final
  8
  9
[docs] 10def enquote_string(s: str) -> str: 11 return ''.join(enquoted(s))
12 13
[docs] 14def dequote_string(s: str) -> str: 15 return ''.join(dequoted(s))
16 17
[docs] 18def enquote_bytes(s: str) -> str: 19 return ''.join(enquoted(s, allow_unicode=False))
20 21
[docs] 22def dequote_bytes(s: str) -> str: 23 return ''.join(dequoted(s, allow_unicode=False))
24 25
[docs] 26def bytes_encode(s: str) -> bytes: 27 return s.encode('latin-1')
28 29
[docs] 30def bytes_decode(b: bytes) -> str: 31 return b.decode('latin-1')
32 33 34NORMAL = 1 35ESCAPE = 2 36CPOINT = 3 37 38ESCAPE_TABLE: Final = { 39 '"': '"', 40 '\\': '\\', 41 'n': '\n', 42 't': '\t', 43 'r': '\r', 44 'f': '\f', 45} 46 47CPOINT_TABLE: Final = { 48 'x': 2, 49 'u': 4, 50 'U': 8, 51} 52 53HEX_TABLE = {c: int(c, 16) for c in '0123456789abcdefABCDEF'} 54 55
[docs] 56def dequoted(it: Iterable[str], *, allow_unicode: bool = True) -> Iterator[str]: 57 acc = 0 58 cnt = 0 59 state = NORMAL 60 for c in it: 61 if state == CPOINT: 62 if c not in HEX_TABLE: 63 raise ValueError(f'Expected hex digit, got: {c}') 64 65 acc *= 16 66 acc += HEX_TABLE[c] 67 cnt -= 1 68 if cnt == 0: 69 yield chr(acc) 70 acc = 0 71 state = NORMAL 72 73 elif state == ESCAPE: 74 if c in CPOINT_TABLE: 75 if not allow_unicode and c != 'x': 76 raise ValueError(fr'Unicode escape sequence not allowed: \{c}') 77 cnt = CPOINT_TABLE[c] 78 state = CPOINT 79 elif c in ESCAPE_TABLE: 80 yield ESCAPE_TABLE[c] 81 state = NORMAL 82 else: 83 raise ValueError(fr'Unexpected escape sequence: \{c}') 84 85 elif c == '\\': 86 state = ESCAPE 87 88 else: 89 yield c 90 91 if state == CPOINT: 92 raise ValueError('Incomplete Unicode code point') 93 elif state == ESCAPE: 94 raise ValueError('Incomplete escape sequence')
95 96 97ENQUOTE_TABLE: Final = { 98 ord('\t'): r'\t', # 9 99 ord('\n'): r'\n', # 10 100 ord('\f'): r'\f', # 12 101 ord('\r'): r'\r', # 13 102 ord('"'): r'\"', # 34 103 ord('\\'): r'\\', # 92 104} 105 106
[docs] 107def enquoted(it: Iterable[str], *, allow_unicode: bool = True) -> Iterator[str]: 108 for c in it: 109 code = ord(c) 110 if code in ENQUOTE_TABLE: 111 yield ENQUOTE_TABLE[code] 112 elif 32 <= code < 127: 113 yield c 114 elif code <= 0xFF: 115 yield fr'\x{code:02x}' 116 elif not allow_unicode: 117 raise ValueError(f"Unicode character not allowed: '{c}' ({code})") 118 elif code <= 0xFFFF: 119 yield fr'\u{code:04x}' 120 elif code <= 0xFFFFFFFF: 121 yield fr'\U{code:08x}' 122 else: 123 raise ValueError(f"Unsupported character: '{c}' ({code})")