Skip to content

Commit 93fada3

Browse files
committed
feat(vendor): Update Vendored deps
1 parent 829f020 commit 93fada3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+15558
-13211
lines changed

vendor/bin/normalizer

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
#!/usr/local/opt/[email protected]/bin/python3.11
1+
#!/home/geekmasher/.local/share/virtualenvs/policy-as-code-ys9TzZIz/bin/python
22
# -*- coding: utf-8 -*-
33
import re
44
import sys
5-
from charset_normalizer.cli import cli_detect
5+
from charset_normalizer import cli
66
if __name__ == '__main__':
77
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8-
sys.exit(cli_detect())
8+
sys.exit(cli.cli_detect())

vendor/certifi/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .core import contents, where
22

33
__all__ = ["contents", "where"]
4-
__version__ = "2024.07.04"
4+
__version__ = "2025.04.26"

vendor/certifi/cacert.pem

Lines changed: 253 additions & 375 deletions
Large diffs are not rendered by default.

vendor/charset_normalizer/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- coding: utf-8 -*-
21
"""
32
Charset-Normalizer
43
~~~~~~~~~~~~~~
@@ -19,6 +18,9 @@
1918
:copyright: (c) 2021 by Ahmed TAHRI
2019
:license: MIT, see LICENSE for more details.
2120
"""
21+
22+
from __future__ import annotations
23+
2224
import logging
2325

2426
from .api import from_bytes, from_fp, from_path, is_binary

vendor/charset_normalizer/__main__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from .cli import cli_detect
24

35
if __name__ == "__main__":

vendor/charset_normalizer/api.py

Lines changed: 89 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from __future__ import annotations
2+
13
import logging
24
from os import PathLike
3-
from typing import BinaryIO, List, Optional, Set, Union
5+
from typing import BinaryIO
46

57
from .cd import (
68
coherence_ratio,
@@ -21,8 +23,6 @@
2123
should_strip_sig_or_bom,
2224
)
2325

24-
# Will most likely be controversial
25-
# logging.addLevelName(TRACE, "TRACE")
2626
logger = logging.getLogger("charset_normalizer")
2727
explain_handler = logging.StreamHandler()
2828
explain_handler.setFormatter(
@@ -31,12 +31,12 @@
3131

3232

3333
def from_bytes(
34-
sequences: Union[bytes, bytearray],
34+
sequences: bytes | bytearray,
3535
steps: int = 5,
3636
chunk_size: int = 512,
3737
threshold: float = 0.2,
38-
cp_isolation: Optional[List[str]] = None,
39-
cp_exclusion: Optional[List[str]] = None,
38+
cp_isolation: list[str] | None = None,
39+
cp_exclusion: list[str] | None = None,
4040
preemptive_behaviour: bool = True,
4141
explain: bool = False,
4242
language_threshold: float = 0.1,
@@ -62,7 +62,7 @@ def from_bytes(
6262

6363
if not isinstance(sequences, (bytearray, bytes)):
6464
raise TypeError(
65-
"Expected object of type bytes or bytearray, got: {0}".format(
65+
"Expected object of type bytes or bytearray, got: {}".format(
6666
type(sequences)
6767
)
6868
)
@@ -76,7 +76,7 @@ def from_bytes(
7676

7777
if length == 0:
7878
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
79-
if explain:
79+
if explain: # Defensive: ensure exit path clean handler
8080
logger.removeHandler(explain_handler)
8181
logger.setLevel(previous_logger_level or logging.WARNING)
8282
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
@@ -135,9 +135,9 @@ def from_bytes(
135135
),
136136
)
137137

138-
prioritized_encodings: List[str] = []
138+
prioritized_encodings: list[str] = []
139139

140-
specified_encoding: Optional[str] = (
140+
specified_encoding: str | None = (
141141
any_specified_encoding(sequences) if preemptive_behaviour else None
142142
)
143143

@@ -149,16 +149,18 @@ def from_bytes(
149149
specified_encoding,
150150
)
151151

152-
tested: Set[str] = set()
153-
tested_but_hard_failure: List[str] = []
154-
tested_but_soft_failure: List[str] = []
152+
tested: set[str] = set()
153+
tested_but_hard_failure: list[str] = []
154+
tested_but_soft_failure: list[str] = []
155155

156-
fallback_ascii: Optional[CharsetMatch] = None
157-
fallback_u8: Optional[CharsetMatch] = None
158-
fallback_specified: Optional[CharsetMatch] = None
156+
fallback_ascii: CharsetMatch | None = None
157+
fallback_u8: CharsetMatch | None = None
158+
fallback_specified: CharsetMatch | None = None
159159

160160
results: CharsetMatches = CharsetMatches()
161161

162+
early_stop_results: CharsetMatches = CharsetMatches()
163+
162164
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
163165

164166
if sig_encoding is not None:
@@ -187,7 +189,7 @@ def from_bytes(
187189

188190
tested.add(encoding_iana)
189191

190-
decoded_payload: Optional[str] = None
192+
decoded_payload: str | None = None
191193
bom_or_sig_available: bool = sig_encoding == encoding_iana
192194
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
193195
encoding_iana
@@ -221,16 +223,20 @@ def from_bytes(
221223
try:
222224
if is_too_large_sequence and is_multi_byte_decoder is False:
223225
str(
224-
sequences[: int(50e4)]
225-
if strip_sig_or_bom is False
226-
else sequences[len(sig_payload) : int(50e4)],
226+
(
227+
sequences[: int(50e4)]
228+
if strip_sig_or_bom is False
229+
else sequences[len(sig_payload) : int(50e4)]
230+
),
227231
encoding=encoding_iana,
228232
)
229233
else:
230234
decoded_payload = str(
231-
sequences
232-
if strip_sig_or_bom is False
233-
else sequences[len(sig_payload) :],
235+
(
236+
sequences
237+
if strip_sig_or_bom is False
238+
else sequences[len(sig_payload) :]
239+
),
234240
encoding=encoding_iana,
235241
)
236242
except (UnicodeDecodeError, LookupError) as e:
@@ -286,7 +292,7 @@ def from_bytes(
286292
early_stop_count: int = 0
287293
lazy_str_hard_failure = False
288294

289-
md_chunks: List[str] = []
295+
md_chunks: list[str] = []
290296
md_ratios = []
291297

292298
try:
@@ -367,7 +373,13 @@ def from_bytes(
367373
and not lazy_str_hard_failure
368374
):
369375
fallback_entry = CharsetMatch(
370-
sequences, encoding_iana, threshold, False, [], decoded_payload
376+
sequences,
377+
encoding_iana,
378+
threshold,
379+
False,
380+
[],
381+
decoded_payload,
382+
preemptive_declaration=specified_encoding,
371383
)
372384
if encoding_iana == specified_encoding:
373385
fallback_specified = fallback_entry
@@ -385,7 +397,7 @@ def from_bytes(
385397
)
386398

387399
if not is_multi_byte_decoder:
388-
target_languages: List[str] = encoding_languages(encoding_iana)
400+
target_languages: list[str] = encoding_languages(encoding_iana)
389401
else:
390402
target_languages = mb_encoding_languages(encoding_iana)
391403

@@ -421,36 +433,66 @@ def from_bytes(
421433
),
422434
)
423435

424-
results.append(
425-
CharsetMatch(
426-
sequences,
427-
encoding_iana,
428-
mean_mess_ratio,
429-
bom_or_sig_available,
430-
cd_ratios_merged,
431-
decoded_payload,
432-
)
436+
current_match = CharsetMatch(
437+
sequences,
438+
encoding_iana,
439+
mean_mess_ratio,
440+
bom_or_sig_available,
441+
cd_ratios_merged,
442+
(
443+
decoded_payload
444+
if (
445+
is_too_large_sequence is False
446+
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
447+
)
448+
else None
449+
),
450+
preemptive_declaration=specified_encoding,
433451
)
434452

453+
results.append(current_match)
454+
435455
if (
436456
encoding_iana in [specified_encoding, "ascii", "utf_8"]
437457
and mean_mess_ratio < 0.1
438458
):
459+
# If md says nothing to worry about, then... stop immediately!
460+
if mean_mess_ratio == 0.0:
461+
logger.debug(
462+
"Encoding detection: %s is most likely the one.",
463+
current_match.encoding,
464+
)
465+
if explain: # Defensive: ensure exit path clean handler
466+
logger.removeHandler(explain_handler)
467+
logger.setLevel(previous_logger_level)
468+
return CharsetMatches([current_match])
469+
470+
early_stop_results.append(current_match)
471+
472+
if (
473+
len(early_stop_results)
474+
and (specified_encoding is None or specified_encoding in tested)
475+
and "ascii" in tested
476+
and "utf_8" in tested
477+
):
478+
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
439479
logger.debug(
440-
"Encoding detection: %s is most likely the one.", encoding_iana
480+
"Encoding detection: %s is most likely the one.",
481+
probable_result.encoding,
441482
)
442-
if explain:
483+
if explain: # Defensive: ensure exit path clean handler
443484
logger.removeHandler(explain_handler)
444485
logger.setLevel(previous_logger_level)
445-
return CharsetMatches([results[encoding_iana]])
486+
487+
return CharsetMatches([probable_result])
446488

447489
if encoding_iana == sig_encoding:
448490
logger.debug(
449491
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
450492
"the beginning of the sequence.",
451493
encoding_iana,
452494
)
453-
if explain:
495+
if explain: # Defensive: ensure exit path clean handler
454496
logger.removeHandler(explain_handler)
455497
logger.setLevel(previous_logger_level)
456498
return CharsetMatches([results[encoding_iana]])
@@ -504,8 +546,8 @@ def from_fp(
504546
steps: int = 5,
505547
chunk_size: int = 512,
506548
threshold: float = 0.20,
507-
cp_isolation: Optional[List[str]] = None,
508-
cp_exclusion: Optional[List[str]] = None,
549+
cp_isolation: list[str] | None = None,
550+
cp_exclusion: list[str] | None = None,
509551
preemptive_behaviour: bool = True,
510552
explain: bool = False,
511553
language_threshold: float = 0.1,
@@ -530,12 +572,12 @@ def from_fp(
530572

531573

532574
def from_path(
533-
path: Union[str, bytes, PathLike], # type: ignore[type-arg]
575+
path: str | bytes | PathLike, # type: ignore[type-arg]
534576
steps: int = 5,
535577
chunk_size: int = 512,
536578
threshold: float = 0.20,
537-
cp_isolation: Optional[List[str]] = None,
538-
cp_exclusion: Optional[List[str]] = None,
579+
cp_isolation: list[str] | None = None,
580+
cp_exclusion: list[str] | None = None,
539581
preemptive_behaviour: bool = True,
540582
explain: bool = False,
541583
language_threshold: float = 0.1,
@@ -561,12 +603,12 @@ def from_path(
561603

562604

563605
def is_binary(
564-
fp_or_path_or_payload: Union[PathLike, str, BinaryIO, bytes], # type: ignore[type-arg]
606+
fp_or_path_or_payload: PathLike | str | BinaryIO | bytes, # type: ignore[type-arg]
565607
steps: int = 5,
566608
chunk_size: int = 512,
567609
threshold: float = 0.20,
568-
cp_isolation: Optional[List[str]] = None,
569-
cp_exclusion: Optional[List[str]] = None,
610+
cp_isolation: list[str] | None = None,
611+
cp_exclusion: list[str] | None = None,
570612
preemptive_behaviour: bool = True,
571613
explain: bool = False,
572614
language_threshold: float = 0.1,

0 commit comments

Comments
 (0)