1from __future__ import annotations
2
3from typing import TYPE_CHECKING
4
5if TYPE_CHECKING:
6 from collections.abc import Iterable, Iterator
7 from typing import Final
8
9
[docs]
10def enquote_string(s: str) -> str:
11 return ''.join(enquoted(s))
12
13
[docs]
14def dequote_string(s: str) -> str:
15 return ''.join(dequoted(s))
16
17
[docs]
18def enquote_bytes(s: str) -> str:
19 return ''.join(enquoted(s, allow_unicode=False))
20
21
[docs]
22def dequote_bytes(s: str) -> str:
23 return ''.join(dequoted(s, allow_unicode=False))
24
25
[docs]
26def bytes_encode(s: str) -> bytes:
27 return s.encode('latin-1')
28
29
[docs]
30def bytes_decode(b: bytes) -> str:
31 return b.decode('latin-1')
32
33
34NORMAL = 1
35ESCAPE = 2
36CPOINT = 3
37
38ESCAPE_TABLE: Final = {
39 '"': '"',
40 '\\': '\\',
41 'n': '\n',
42 't': '\t',
43 'r': '\r',
44 'f': '\f',
45}
46
47CPOINT_TABLE: Final = {
48 'x': 2,
49 'u': 4,
50 'U': 8,
51}
52
53HEX_TABLE = {c: int(c, 16) for c in '0123456789abcdefABCDEF'}
54
55
[docs]
56def dequoted(it: Iterable[str], *, allow_unicode: bool = True) -> Iterator[str]:
57 acc = 0
58 cnt = 0
59 state = NORMAL
60 for c in it:
61 if state == CPOINT:
62 if c not in HEX_TABLE:
63 raise ValueError(f'Expected hex digit, got: {c}')
64
65 acc *= 16
66 acc += HEX_TABLE[c]
67 cnt -= 1
68 if cnt == 0:
69 yield chr(acc)
70 acc = 0
71 state = NORMAL
72
73 elif state == ESCAPE:
74 if c in CPOINT_TABLE:
75 if not allow_unicode and c != 'x':
76 raise ValueError(fr'Unicode escape sequence not allowed: \{c}')
77 cnt = CPOINT_TABLE[c]
78 state = CPOINT
79 elif c in ESCAPE_TABLE:
80 yield ESCAPE_TABLE[c]
81 state = NORMAL
82 else:
83 raise ValueError(fr'Unexpected escape sequence: \{c}')
84
85 elif c == '\\':
86 state = ESCAPE
87
88 else:
89 yield c
90
91 if state == CPOINT:
92 raise ValueError('Incomplete Unicode code point')
93 elif state == ESCAPE:
94 raise ValueError('Incomplete escape sequence')
95
96
97ENQUOTE_TABLE: Final = {
98 ord('\t'): r'\t', # 9
99 ord('\n'): r'\n', # 10
100 ord('\f'): r'\f', # 12
101 ord('\r'): r'\r', # 13
102 ord('"'): r'\"', # 34
103 ord('\\'): r'\\', # 92
104}
105
106
[docs]
107def enquoted(it: Iterable[str], *, allow_unicode: bool = True) -> Iterator[str]:
108 for c in it:
109 code = ord(c)
110 if code in ENQUOTE_TABLE:
111 yield ENQUOTE_TABLE[code]
112 elif 32 <= code < 127:
113 yield c
114 elif code <= 0xFF:
115 yield fr'\x{code:02x}'
116 elif not allow_unicode:
117 raise ValueError(f"Unicode character not allowed: '{c}' ({code})")
118 elif code <= 0xFFFF:
119 yield fr'\u{code:04x}'
120 elif code <= 0xFFFFFFFF:
121 yield fr'\U{code:08x}'
122 else:
123 raise ValueError(f"Unsupported character: '{c}' ({code})")