1717HEX_PATTERN = re .compile (r'&#x([\da-fA-F]+);' )
1818QUOTE_PATTERN = re .compile (r'[\']+' )
1919DISALLOWED_CHARS_PATTERN = re .compile (r'[^-a-zA-Z0-9]+' )
20+ DISALLOWED_UNICODE_CHARS_PATTERN = re .compile (r'[\W_]+' )
2021DUPLICATE_DASH_PATTERN = re .compile (r'-{2,}' )
2122NUMBERS_PATTERN = re .compile (r'(?<=\d),(?=\d)' )
2223DEFAULT_SEPARATOR = '-'
@@ -66,7 +67,8 @@ def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', sav
6667
6768def slugify (text , entities = True , decimal = True , hexadecimal = True , max_length = 0 , word_boundary = False ,
6869 separator = DEFAULT_SEPARATOR , save_order = False , stopwords = (), regex_pattern = None , lowercase = True ,
69- replacements : typing .Iterable [typing .Iterable [str ]] = ()):
70+ replacements : typing .Iterable [typing .Iterable [str ]] = (),
71+ allow_unicode = False ):
7072 """
7173 Make a slug from the given text.
7274 :param text (str): initial text
@@ -81,6 +83,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
8183 :param regex_pattern (str): regex pattern for disallowed characters
8284 :param lowercase (bool): activate case sensitivity by setting it to False
8385 :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']]
86+ :param allow_unicode (bool): allow unicode characters
8487 :return (str):
8588 """
8689
@@ -97,7 +100,8 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
97100 text = QUOTE_PATTERN .sub (DEFAULT_SEPARATOR , text )
98101
99102 # decode unicode
100- text = unidecode .unidecode (text )
103+ if not allow_unicode :
104+ text = unidecode .unidecode (text )
101105
102106 # ensure text is still in unicode
103107 if not isinstance (text , str ):
@@ -122,7 +126,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
122126 pass
123127
124128 # translate
125- text = unicodedata .normalize ('NFKD' , text )
129+ if allow_unicode :
130+ text = unicodedata .normalize ('NFKC' , text )
131+ else :
132+ text = unicodedata .normalize ('NFKD' , text )
133+
126134 if sys .version_info < (3 ,):
127135 text = text .encode ('ascii' , 'ignore' )
128136
@@ -137,7 +145,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w
137145 text = NUMBERS_PATTERN .sub ('' , text )
138146
139147 # replace all other unwanted characters
140- pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
148+ if allow_unicode :
149+ pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
150+ else :
151+ pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
152+
141153 text = re .sub (pattern , DEFAULT_SEPARATOR , text )
142154
143155 # remove redundant
0 commit comments