Skip to content

Commit b3a8435

Browse files
committed
Merge pull request #252 from behrtam/add-unicode-test-word-count
word-count: Add test case for unicode support
2 parents cf98227 + 4f4d22d commit b3a8435

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

word-count/example.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
from collections import Counter
22

33

4+
# to be backwards compatible with the old Python 2.X
5+
def decode_if_needed(string):
6+
try:
7+
return string.decode('utf-8')
8+
except AttributeError:
9+
return string
10+
11+
412
def word_count(text):
513
replace_nonalpha = lambda c: c.lower() if c.isalnum() else ' '
6-
text = ''.join(replace_nonalpha(c) for c in text)
14+
text = ''.join(replace_nonalpha(c) for c in decode_if_needed(text))
715
return Counter(text.split())

word-count/word_count_test.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
1+
# -*- coding: utf-8 -*-
12
import unittest
23

34
from wordcount import word_count
45

56

7+
# to be backwards compatible with the old Python 2.X
8+
def decode_if_needed(string):
9+
try:
10+
return string.decode('utf-8')
11+
except AttributeError:
12+
return string
13+
14+
615
class WordCountTests(unittest.TestCase):
716

817
def test_count_one_word(self):
@@ -69,5 +78,11 @@ def test_non_alphanumeric(self):
6978
word_count('hey,my_spacebar_is_broken.')
7079
)
7180

81+
def test_unicode(self):
82+
self.assertEqual(
83+
{decode_if_needed('до'): 1, decode_if_needed('свидания'): 1},
84+
word_count('до🖖свидания!')
85+
)
86+
7287
if __name__ == '__main__':
7388
unittest.main()

0 commit comments

Comments
 (0)