|
19 | 19 | Unit tests for PySpark; additional tests are implemented as doctests in
|
20 | 20 | individual modules.
|
21 | 21 |
|
22 |
| -This file will merged to tests.py. But for now, this file is separated to |
23 |
| -focus to streaming test case |
| 22 | +This file will merged to tests.py. But for now, this file is separated due |
| 23 | +to focusing to streaming test case |
24 | 24 |
|
25 | 25 | """
|
26 | 26 | from fileinput import input
|
27 | 27 | from glob import glob
|
| 28 | +from itertools import chain |
28 | 29 | import os
|
29 | 30 | import re
|
30 | 31 | import shutil
|
|
41 | 42 |
|
42 | 43 | SPARK_HOME = os.environ["SPARK_HOME"]
|
43 | 44 |
|
| 45 | +class buff: |
| 46 | + """ |
| 47 | + Buffer for store the output from stream |
| 48 | + """ |
| 49 | + result = None |
44 | 50 |
|
45 | 51 | class PySparkStreamingTestCase(unittest.TestCase):
|
46 |
| - |
47 | 52 | def setUp(self):
|
48 |
| - self._old_sys_path = list(sys.path) |
| 53 | + print "set up" |
49 | 54 | class_name = self.__class__.__name__
|
50 | 55 | self.ssc = StreamingContext(appName=class_name, duration=Seconds(1))
|
51 | 56 |
|
52 | 57 | def tearDown(self):
|
| 58 | + print "tear donw" |
53 | 59 | self.ssc.stop()
|
54 |
| - sys.path = self._old_sys_path |
| 60 | + time.sleep(10) |
| 61 | + |
| 62 | +class TestBasicOperationsSuite(PySparkStreamingTestCase): |
| 63 | + def setUp(self): |
| 64 | + PySparkStreamingTestCase.setUp(self) |
| 65 | + buff.result = None |
| 66 | + self.timeout = 10 # seconds |
| 67 | + |
| 68 | + def tearDown(self): |
| 69 | + PySparkStreamingTestCase.tearDown(self) |
| 70 | + |
| 71 | + def test_map(self): |
| 72 | + test_input = [range(1,5), range(5,9), range(9, 13)] |
| 73 | + def test_func(dstream): |
| 74 | + return dstream.map(lambda x: str(x)) |
| 75 | + expected = map(str, test_input) |
| 76 | + output = self.run_stream(test_input, test_func) |
| 77 | + self.assertEqual(output, expected) |
| 78 | + |
| 79 | + def test_flatMap(self): |
| 80 | + test_input = [range(1,5), range(5,9), range(9, 13)] |
| 81 | + def test_func(dstream): |
| 82 | + return dstream.flatMap(lambda x: (x, x * 2)) |
| 83 | + # Maybe there be good way to create flatmap |
| 84 | + excepted = map(lambda x: list(chain.from_iterable((map(lambda y:[y, y*2], x)))), |
| 85 | + test_input) |
| 86 | + output = self.run_stream(test_input, test_func) |
| 87 | + |
| 88 | + def run_stream(self, test_input, test_func): |
| 89 | + # Generate input stream with user-defined input |
| 90 | + test_input_stream = self.ssc._testInputStream(test_input) |
| 91 | + # Applyed test function to stream |
| 92 | + test_stream = test_func(test_input_stream) |
| 93 | + # Add job to get outpuf from stream |
| 94 | + test_stream._test_output(buff) |
| 95 | + self.ssc.start() |
55 | 96 |
|
| 97 | + start_time = time.time() |
| 98 | + while True: |
| 99 | + current_time = time.time() |
| 100 | + # check time out |
| 101 | + if (current_time - start_time) > self.timeout: |
| 102 | + self.ssc.stop() |
| 103 | + break |
| 104 | + self.ssc.awaitTermination(50) |
| 105 | + if buff.result is not None: |
| 106 | + break |
| 107 | + return buff.result |
56 | 108 |
|
57 | 109 | if __name__ == "__main__":
|
58 | 110 | unittest.main()
|
0 commit comments