Skip to content

Commit

Permalink
Change normalize_encodings() to avoid using .translate() or depending on
Browse files Browse the repository at this point in the history
the string type.  It will always return a Unicode string.  The algoritm's
specification is unchanged.
  • Loading branch information
gvanrossum committed Jun 7, 2007
1 parent c3b6ac7 commit ad5b9de
Showing 1 changed file with 11 additions and 14 deletions.
25 changes: 11 additions & 14 deletions Lib/encodings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
_norm_encoding_map = (' . '
'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
' abcdefghijklmnopqrstuvwxyz '
' '
' '
' ')
_aliases = aliases.aliases

class CodecRegistryError(LookupError, SystemError):
Expand All @@ -58,14 +52,17 @@ def normalize_encoding(encoding):
non-ASCII characters, these must be Latin-1 compatible.
"""
# Make sure we have an 8-bit string, because .translate() works
# differently for Unicode strings.
if isinstance(encoding, str):
# Note that .encode('latin-1') does *not* use the codec
# registry, so this call doesn't recurse. (See unicodeobject.c
# PyUnicode_AsEncodedString() for details)
encoding = encoding.encode('latin-1')
return '_'.join(encoding.translate(_norm_encoding_map).split())
chars = []
punct = False
for c in encoding:
if c.isalnum() or c == '.':
if punct and chars:
chars.append('_')
chars.append(c)
punct = False
else:
punct = True
return ''.join(chars)

def search_function(encoding):

Expand Down

0 comments on commit ad5b9de

Please sign in to comment.