|
1 |
| -# Things to Remember |
2 |
| -# |
3 |
| -# Python only supports a single constructor per class, the __init__ method. |
4 |
| -# |
5 |
| -# Use @classmethod to define alternative constructors for your classes. They are like |
6 |
| -# "factory methods" in Java. |
7 |
| -# |
8 |
| -# Use class method polymorphism to provide "generic" ways to build and connect concrete subclasses. |
| 1 | +""" |
| 2 | +Consider "itertools" for working with iterators and generators |
| 3 | +""" |
9 | 4 |
|
| 5 | +import itertools |
10 | 6 |
|
11 |
| -import os |
12 |
| -from threading import Thread |
| 7 | +# 1. linking iterators together |
| 8 | +it = itertools.chain([1, 2, 3], [4, 5, 6]) |
| 9 | +print(f"chain: {list(it)}") |
13 | 10 |
|
| 11 | +it1 = [i * 3 for i in ("a", "b", "c")] |
| 12 | +it2 = [i * 3 for i in ("x", "y", "z")] |
| 13 | +# nested_it is an iterator of iterators |
| 14 | +nested_it = [it1, it2] |
| 15 | +output_it = itertools.chain.from_iterable(nested_it) |
| 16 | +print(f"output_it: {list(output_it)}") |
14 | 17 |
|
15 |
| -class InputData(object): |
16 |
| - def read(self): |
17 |
| - raise NotImplementedError |
| 18 | +it = itertools.repeat("hello", 3) |
| 19 | +print(list(it)) |
18 | 20 |
|
19 |
| - @classmethod |
20 |
| - def generate_inputs(cls, config): |
21 |
| - raise NotImplementedError |
| 21 | +it = itertools.cycle([1, 2]) |
| 22 | +result = [next(it) for _ in range(10)] |
| 23 | +print(result) |
22 | 24 |
|
| 25 | +# tee splits a single iterator into N parallel iterators |
| 26 | +it1, it2, it3 = itertools.tee(["first", "second"], 3) |
| 27 | +print(list(it1)) |
| 28 | +print(list(it2)) |
| 29 | +print(list(it3)) |
23 | 30 |
|
24 |
| -class PathInputData(InputData): |
25 |
| - def __init__(self, path): |
26 |
| - super().__init__() |
27 |
| - self.path = path |
| 31 | +# zip_longest |
| 32 | +keys = ["one", "two", "three"] |
| 33 | +values = [1, 2] |
| 34 | +normal = list(zip(keys, values)) |
| 35 | +print(f"zip: {normal}") |
| 36 | +longest = list(itertools.zip_longest(keys, values, fillvalue="nope")) |
| 37 | +print(f"zip_longest: {longest}") |
28 | 38 |
|
29 |
| - def read(self): |
30 |
| - return open(self.path).read() |
| 39 | +# 2. filtering items from an iterator |
31 | 40 |
|
32 |
| - @classmethod |
33 |
| - def generate_inputs(cls, config): |
34 |
| - data_dir = config["data_dir"] |
35 |
| - for name in os.listdir(data_dir): |
36 |
| - yield cls(os.path.join(data_dir, name)) |
| 41 | +# islice slices an iterator by numerical indexes without copying, similar to standard slicing |
| 42 | +values = list(i + 1 for i in range(10)) |
| 43 | +first_five = itertools.islice(values, 5) |
| 44 | +print(f"first_five: {list(first_five)}") |
| 45 | +middle_odds = itertools.islice(values, 2, 8, 2) |
| 46 | +print(f"middle_odds: {list(middle_odds)}") |
37 | 47 |
|
| 48 | +values = list(i + 1 for i in range(10)) |
| 49 | +less_than_seven = itertools.takewhile(lambda x: x < 7, values) |
| 50 | +print(f"less than seven: {list(less_than_seven)}") |
38 | 51 |
|
39 |
| -class Worker(object): |
40 |
| - def __init__(self, input_data): |
41 |
| - self.input_data = input_data |
42 |
| - self.result = None |
| 52 | +values = list(i + 1 for i in range(10)) |
| 53 | +more_than_seven = itertools.dropwhile(lambda x: x < 7, values) |
| 54 | +print(f"more than seven: {list(more_than_seven)}") |
43 | 55 |
|
44 |
| - def map(self): |
45 |
| - raise NotImplementedError |
| 56 | +odd_nums = itertools.filterfalse(lambda x: x % 2 == 0, values) |
| 57 | +print(f"odd_nums: {list(odd_nums)}") |
46 | 58 |
|
47 |
| - def reduce(self, other): |
48 |
| - raise NotImplementedError |
| 59 | +# 3. produce combinations of items from iterators |
49 | 60 |
|
50 |
| - @classmethod |
51 |
| - def create_workers(cls, input_class, config): |
52 |
| - workers = [] |
53 |
| - for input_data in input_class.generate_inputs(config): |
54 |
| - workers.append(cls(input_data)) |
55 |
| - return workers |
| 61 | +it = itertools.batched([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 3) |
| 62 | +print(f"batched: {list(it)}") |
56 | 63 |
|
| 64 | +route = ["Los Angeles", "Bakersfield", "Modesto", "Sacramento"] |
| 65 | +it = itertools.pairwise(route) |
| 66 | +print(f"pairwise: {list(it)}") |
57 | 67 |
|
58 |
| -class LineCountWorker(Worker): |
59 |
| - def map(self): |
60 |
| - data = self.input_data.read() |
61 |
| - self.result = data.count("\n") |
| 68 | +values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] |
| 69 | +sum_reduce = itertools.accumulate(values) |
| 70 | +print(f"sum_reduce: {list(sum_reduce)}") |
62 | 71 |
|
63 |
| - def reduce(self, other): |
64 |
| - self.result += other.result |
65 | 72 |
|
| 73 | +def sum_modulo_20(first, second): |
| 74 | + output = first + second |
| 75 | + return output % 20 |
66 | 76 |
|
67 |
| -def generate_inputs(data_dir): |
68 |
| - for name in os.listdir(data_dir): |
69 |
| - yield PathInputData(os.path.join(data_dir, name)) |
70 | 77 |
|
| 78 | +module_reduce = itertools.accumulate(values, sum_modulo_20) |
| 79 | +print(f"modulo_reduce: {list(module_reduce)}") |
71 | 80 |
|
72 |
| -def create_workers(input_list): |
73 |
| - return [LineCountWorker(input_data) for input_data in input_list] |
| 81 | +single = itertools.product([1, 2], repeat=2) |
| 82 | +print(f"single: f{single}") |
| 83 | +multiple = itertools.product([1, 2], ["a", "b"]) |
| 84 | +print(f"multiple: {multiple}") |
74 | 85 |
|
| 86 | +it = itertools.permutations([1, 2, 3, 4], 2) |
| 87 | +print(f"permutations: {list(it)}") |
75 | 88 |
|
76 |
| -def execute(workers): |
77 |
| - threads = [Thread(target=w.map) for w in workers] |
78 |
| - for thread in threads: |
79 |
| - thread.start() |
80 |
| - for thread in threads: |
81 |
| - thread.join() |
| 89 | +it = itertools.combinations([1, 2, 3, 4], 2) |
| 90 | +print(f"combinations: {list(it)}") |
82 | 91 |
|
83 |
| - first, rest = workers[0], workers[1:] |
84 |
| - for worker in rest: |
85 |
| - first.reduce(worker) |
86 |
| - return first.result |
87 |
| - |
88 |
| - |
89 |
| -def map_reduce(worker_class, input_class, config): |
90 |
| - """ |
91 |
| - The beauty if map_reduce is that it's generic. You can pass in different |
92 |
| - worker_class and input_class, and it will glue them together! |
93 |
| - """ |
94 |
| - workers = worker_class.create_workers(input_class, config) |
95 |
| - return execute(workers) |
96 |
| - |
97 |
| - |
98 |
| -def main(): |
99 |
| - current_dir = os.path.dirname(os.path.abspath(__file__)) |
100 |
| - config = {"data_dir": current_dir} |
101 |
| - result = map_reduce(LineCountWorker, PathInputData, config) |
102 |
| - print("There are %s lines" % result) |
103 |
| - |
104 |
| - |
105 |
| -if __name__ == "__main__": |
106 |
| - main() |
| 92 | +it = itertools.combinations_with_replacement([1, 2, 3, 4], 2) |
| 93 | +print(f"combinations_with_replacement: {list(it)}") |
0 commit comments