forked from smythp/python-201
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctional.py
66 lines (42 loc) · 1.8 KB
/
functional.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def remove_characters(string, unwanted_character_list):
"Takes unwanted characters as a list and removes them from a string."
out_string = ''
for character in string:
if character not in unwanted_character_list:
out_string += character
return out_string
def clean_string(string):
"Process and clean a string for tokenization."
string_without_punctuation = remove_characters(string, ['.', ','])
string_lower_case = string_without_punctuation.lower()
return string_lower_case
def tokenize(string, preprocess=False):
"""Make string into list of words. \
If preprocess is True, clean the string first."""
if preprocess:
string = clean_string(string)
word_list = string.split()
return word_list
def count_word_occurances(word_list, word_to_match):
"""Returns the number of occurances of word in the string."""
word_match_counter = 0
for word in word_list:
if word == word_to_match:
word_match_counter += 1
return word_match_counter
def words_matching_first_character(word_list, match_character):
words_beginning_with_character = []
for word in word_list:
if word[0] == match_character:
words_beginning_with_character.append(word)
return words_beginning_with_character
if __name__ == '__main__':
original_text = "Everything should be built top-down, except the first time."
tokens = tokenize(original_text, True)
print("Total words:", len(tokens))
print('Number of occurances of word match:',
count_word_occurances(tokens, 'except'))
print("Words beginning with character:",
words_matching_first_character(tokens, 'e'))
print("Number of words beginning with character:",
len(words_matching_first_character(tokens, 'e')))