Skip to content

Commit 6f5d7f6

Browse files
committed
Vendor in pip 23.0
1 parent 150c633 commit 6f5d7f6

File tree

18 files changed

+1851
-0
lines changed

18 files changed

+1851
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from typing import TYPE_CHECKING, Tuple
2+
3+
if TYPE_CHECKING:
4+
# TypedDict was introduced in Python 3.8.
5+
#
6+
# TODO: Remove the else block and TYPE_CHECKING check when dropping support
7+
# for Python 3.7.
8+
from typing import TypedDict
9+
10+
class CodingStateMachineDict(TypedDict, total=False):
11+
class_table: Tuple[int, ...]
12+
class_factor: int
13+
state_table: Tuple[int, ...]
14+
char_len_table: Tuple[int, ...]
15+
name: str
16+
language: str # Optional key
17+
18+
else:
19+
CodingStateMachineDict = dict
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
######################## BEGIN LICENSE BLOCK ########################
2+
# This code was modified from latin1prober.py by Rob Speer <rob@lumino.so>.
3+
# The Original Code is Mozilla Universal charset detector code.
4+
#
5+
# The Initial Developer of the Original Code is
6+
# Netscape Communications Corporation.
7+
# Portions created by the Initial Developer are Copyright (C) 2001
8+
# the Initial Developer. All Rights Reserved.
9+
#
10+
# Contributor(s):
11+
# Rob Speer - adapt to MacRoman encoding
12+
# Mark Pilgrim - port to Python
13+
# Shy Shalom - original C code
14+
#
15+
# This library is free software; you can redistribute it and/or
16+
# modify it under the terms of the GNU Lesser General Public
17+
# License as published by the Free Software Foundation; either
18+
# version 2.1 of the License, or (at your option) any later version.
19+
#
20+
# This library is distributed in the hope that it will be useful,
21+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
22+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23+
# Lesser General Public License for more details.
24+
#
25+
# You should have received a copy of the GNU Lesser General Public
26+
# License along with this library; if not, write to the Free Software
27+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
28+
# 02110-1301 USA
29+
######################### END LICENSE BLOCK #########################
30+
31+
from typing import List, Union
32+
33+
from .charsetprober import CharSetProber
34+
from .enums import ProbingState
35+
36+
FREQ_CAT_NUM = 4
37+
38+
UDF = 0 # undefined
39+
OTH = 1 # other
40+
ASC = 2 # ascii capital letter
41+
ASS = 3 # ascii small letter
42+
ACV = 4 # accent capital vowel
43+
ACO = 5 # accent capital other
44+
ASV = 6 # accent small vowel
45+
ASO = 7 # accent small other
46+
ODD = 8 # character that is unlikely to appear
47+
CLASS_NUM = 9 # total classes
48+
49+
# The change from Latin1 is that we explicitly look for extended characters
50+
# that are infrequently-occurring symbols, and consider them to always be
51+
# improbable. This should let MacRoman get out of the way of more likely
52+
# encodings in most situations.
53+
54+
# fmt: off
55+
MacRoman_CharToClass = (
56+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
57+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
58+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
59+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
60+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
61+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
62+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
63+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
64+
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
65+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
66+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
67+
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
68+
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
69+
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
70+
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
71+
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
72+
ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87
73+
ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F
74+
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97
75+
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F
76+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7
77+
OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF
78+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
79+
OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF
80+
OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7
81+
OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF
82+
OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7
83+
ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF
84+
OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7
85+
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF
86+
ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7
87+
ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF
88+
)
89+
90+
# 0 : illegal
91+
# 1 : very unlikely
92+
# 2 : normal
93+
# 3 : very likely
94+
MacRomanClassModel = (
95+
# UDF OTH ASC ASS ACV ACO ASV ASO ODD
96+
0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF
97+
0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH
98+
0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC
99+
0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS
100+
0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV
101+
0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO
102+
0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV
103+
0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO
104+
0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD
105+
)
106+
# fmt: on
107+
108+
109+
class MacRomanProber(CharSetProber):
110+
def __init__(self) -> None:
111+
super().__init__()
112+
self._last_char_class = OTH
113+
self._freq_counter: List[int] = []
114+
self.reset()
115+
116+
def reset(self) -> None:
117+
self._last_char_class = OTH
118+
self._freq_counter = [0] * FREQ_CAT_NUM
119+
120+
# express the prior that MacRoman is a somewhat rare encoding;
121+
# this can be done by starting out in a slightly improbable state
122+
# that must be overcome
123+
self._freq_counter[2] = 10
124+
125+
super().reset()
126+
127+
@property
128+
def charset_name(self) -> str:
129+
return "MacRoman"
130+
131+
@property
132+
def language(self) -> str:
133+
return ""
134+
135+
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
136+
byte_str = self.remove_xml_tags(byte_str)
137+
for c in byte_str:
138+
char_class = MacRoman_CharToClass[c]
139+
freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class]
140+
if freq == 0:
141+
self._state = ProbingState.NOT_ME
142+
break
143+
self._freq_counter[freq] += 1
144+
self._last_char_class = char_class
145+
146+
return self.state
147+
148+
def get_confidence(self) -> float:
149+
if self.state == ProbingState.NOT_ME:
150+
return 0.01
151+
152+
total = sum(self._freq_counter)
153+
confidence = (
154+
0.0
155+
if total < 0.01
156+
else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
157+
)
158+
confidence = max(confidence, 0.0)
159+
# lower the confidence of MacRoman so that other more accurate
160+
# detector can take priority.
161+
confidence *= 0.73
162+
return confidence
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from typing import TYPE_CHECKING, Optional
2+
3+
if TYPE_CHECKING:
4+
# TypedDict was introduced in Python 3.8.
5+
#
6+
# TODO: Remove the else block and TYPE_CHECKING check when dropping support
7+
# for Python 3.7.
8+
from typing import TypedDict
9+
10+
class ResultDict(TypedDict):
11+
encoding: Optional[str]
12+
confidence: float
13+
language: Optional[str]
14+
15+
else:
16+
ResultDict = dict
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2+
import sys
3+
from unittest import TestCase, main
4+
5+
from ..ansi import Back, Fore, Style
6+
from ..ansitowin32 import AnsiToWin32
7+
8+
stdout_orig = sys.stdout
9+
stderr_orig = sys.stderr
10+
11+
12+
class AnsiTest(TestCase):
13+
14+
def setUp(self):
15+
# sanity check: stdout should be a file or StringIO object.
16+
# It will only be AnsiToWin32 if init() has previously wrapped it
17+
self.assertNotEqual(type(sys.stdout), AnsiToWin32)
18+
self.assertNotEqual(type(sys.stderr), AnsiToWin32)
19+
20+
def tearDown(self):
21+
sys.stdout = stdout_orig
22+
sys.stderr = stderr_orig
23+
24+
25+
def testForeAttributes(self):
26+
self.assertEqual(Fore.BLACK, '\033[30m')
27+
self.assertEqual(Fore.RED, '\033[31m')
28+
self.assertEqual(Fore.GREEN, '\033[32m')
29+
self.assertEqual(Fore.YELLOW, '\033[33m')
30+
self.assertEqual(Fore.BLUE, '\033[34m')
31+
self.assertEqual(Fore.MAGENTA, '\033[35m')
32+
self.assertEqual(Fore.CYAN, '\033[36m')
33+
self.assertEqual(Fore.WHITE, '\033[37m')
34+
self.assertEqual(Fore.RESET, '\033[39m')
35+
36+
# Check the light, extended versions.
37+
self.assertEqual(Fore.LIGHTBLACK_EX, '\033[90m')
38+
self.assertEqual(Fore.LIGHTRED_EX, '\033[91m')
39+
self.assertEqual(Fore.LIGHTGREEN_EX, '\033[92m')
40+
self.assertEqual(Fore.LIGHTYELLOW_EX, '\033[93m')
41+
self.assertEqual(Fore.LIGHTBLUE_EX, '\033[94m')
42+
self.assertEqual(Fore.LIGHTMAGENTA_EX, '\033[95m')
43+
self.assertEqual(Fore.LIGHTCYAN_EX, '\033[96m')
44+
self.assertEqual(Fore.LIGHTWHITE_EX, '\033[97m')
45+
46+
47+
def testBackAttributes(self):
48+
self.assertEqual(Back.BLACK, '\033[40m')
49+
self.assertEqual(Back.RED, '\033[41m')
50+
self.assertEqual(Back.GREEN, '\033[42m')
51+
self.assertEqual(Back.YELLOW, '\033[43m')
52+
self.assertEqual(Back.BLUE, '\033[44m')
53+
self.assertEqual(Back.MAGENTA, '\033[45m')
54+
self.assertEqual(Back.CYAN, '\033[46m')
55+
self.assertEqual(Back.WHITE, '\033[47m')
56+
self.assertEqual(Back.RESET, '\033[49m')
57+
58+
# Check the light, extended versions.
59+
self.assertEqual(Back.LIGHTBLACK_EX, '\033[100m')
60+
self.assertEqual(Back.LIGHTRED_EX, '\033[101m')
61+
self.assertEqual(Back.LIGHTGREEN_EX, '\033[102m')
62+
self.assertEqual(Back.LIGHTYELLOW_EX, '\033[103m')
63+
self.assertEqual(Back.LIGHTBLUE_EX, '\033[104m')
64+
self.assertEqual(Back.LIGHTMAGENTA_EX, '\033[105m')
65+
self.assertEqual(Back.LIGHTCYAN_EX, '\033[106m')
66+
self.assertEqual(Back.LIGHTWHITE_EX, '\033[107m')
67+
68+
69+
def testStyleAttributes(self):
70+
self.assertEqual(Style.DIM, '\033[2m')
71+
self.assertEqual(Style.NORMAL, '\033[22m')
72+
self.assertEqual(Style.BRIGHT, '\033[1m')
73+
74+
75+
if __name__ == '__main__':
76+
main()

0 commit comments

Comments
 (0)