Source code for pyk.dequote

  1from __future__ import annotations
  2
  3from typing import TYPE_CHECKING
  4
  5if TYPE_CHECKING:
  6    from collections.abc import Iterable, Iterator
  7    from typing import Final
  8
  9

[docs]
 10def enquote_string(s: str) -> str:
 11    return ''.join(enquoted(s))

 12
 13

[docs]
 14def dequote_string(s: str) -> str:
 15    return ''.join(dequoted(s))

 16
 17

[docs]
 18def enquote_bytes(s: str) -> str:
 19    return ''.join(enquoted(s, allow_unicode=False))

 20
 21

[docs]
 22def dequote_bytes(s: str) -> str:
 23    return ''.join(dequoted(s, allow_unicode=False))

 24
 25

[docs]
 26def bytes_encode(s: str) -> bytes:
 27    return s.encode('latin-1')

 28
 29

[docs]
 30def bytes_decode(b: bytes) -> str:
 31    return b.decode('latin-1')

 32
 33
 34NORMAL = 1
 35ESCAPE = 2
 36CPOINT = 3
 37
 38ESCAPE_TABLE: Final = {
 39    '"': '"',
 40    '\\': '\\',
 41    'n': '\n',
 42    't': '\t',
 43    'r': '\r',
 44    'f': '\f',
 45}
 46
 47CPOINT_TABLE: Final = {
 48    'x': 2,
 49    'u': 4,
 50    'U': 8,
 51}
 52
 53HEX_TABLE = {c: int(c, 16) for c in '0123456789abcdefABCDEF'}
 54
 55

[docs]
 56def dequoted(it: Iterable[str], *, allow_unicode: bool = True) -> Iterator[str]:
 57    acc = 0
 58    cnt = 0
 59    state = NORMAL
 60    for c in it:
 61        if state == CPOINT:
 62            if c not in HEX_TABLE:
 63                raise ValueError(f'Expected hex digit, got: {c}')
 64
 65            acc *= 16
 66            acc += HEX_TABLE[c]
 67            cnt -= 1
 68            if cnt == 0:
 69                yield chr(acc)
 70                acc = 0
 71                state = NORMAL
 72
 73        elif state == ESCAPE:
 74            if c in CPOINT_TABLE:
 75                if not allow_unicode and c != 'x':
 76                    raise ValueError(fr'Unicode escape sequence not allowed: \{c}')
 77                cnt = CPOINT_TABLE[c]
 78                state = CPOINT
 79            elif c in ESCAPE_TABLE:
 80                yield ESCAPE_TABLE[c]
 81                state = NORMAL
 82            else:
 83                raise ValueError(fr'Unexpected escape sequence: \{c}')
 84
 85        elif c == '\\':
 86            state = ESCAPE
 87
 88        else:
 89            yield c
 90
 91    if state == CPOINT:
 92        raise ValueError('Incomplete Unicode code point')
 93    elif state == ESCAPE:
 94        raise ValueError('Incomplete escape sequence')

 95
 96
 97ENQUOTE_TABLE: Final = {
 98    ord('\t'): r'\t',  # 9
 99    ord('\n'): r'\n',  # 10
100    ord('\f'): r'\f',  # 12
101    ord('\r'): r'\r',  # 13
102    ord('"'): r'\"',  # 34
103    ord('\\'): r'\\',  # 92
104}
105
106

[docs]
107def enquoted(it: Iterable[str], *, allow_unicode: bool = True) -> Iterator[str]:
108    for c in it:
109        code = ord(c)
110        if code in ENQUOTE_TABLE:
111            yield ENQUOTE_TABLE[code]
112        elif 32 <= code < 127:
113            yield c
114        elif code <= 0xFF:
115            yield fr'\x{code:02x}'
116        elif not allow_unicode:
117            raise ValueError(f"Unicode character not allowed: '{c}' ({code})")
118        elif code <= 0xFFFF:
119            yield fr'\u{code:04x}'
120        elif code <= 0xFFFFFFFF:
121            yield fr'\U{code:08x}'
122        else:
123            raise ValueError(f"Unsupported character: '{c}' ({code})")