23
23
to focusing to streaming test case
24
24
25
25
"""
26
- from fileinput import input
27
- from glob import glob
28
26
from itertools import chain
29
27
import os
30
- import re
31
- import shutil
32
- import subprocess
33
- import sys
34
- import tempfile
35
28
import time
36
29
import unittest
37
- import zipfile
38
30
import operator
39
31
40
32
from pyspark .context import SparkContext
44
36
45
37
SPARK_HOME = os .environ ["SPARK_HOME" ]
46
38
39
+
47
40
class StreamOutput :
48
41
"""
49
42
a class to store the output from stream
50
43
"""
51
44
result = list ()
52
45
46
+
53
47
class PySparkStreamingTestCase (unittest .TestCase ):
54
48
def setUp (self ):
55
49
class_name = self .__class__ .__name__
@@ -69,6 +63,7 @@ def tearDownClass(cls):
69
63
time .sleep (5 )
70
64
SparkContext ._gateway ._shutdown_callback_server ()
71
65
66
+
72
67
class TestBasicOperationsSuite (PySparkStreamingTestCase ):
73
68
"""
74
69
Input and output of this TestBasicOperationsSuite is the equivalent to
@@ -77,7 +72,7 @@ class TestBasicOperationsSuite(PySparkStreamingTestCase):
77
72
def setUp (self ):
78
73
PySparkStreamingTestCase .setUp (self )
79
74
StreamOutput .result = list ()
80
- self .timeout = 10 # seconds
75
+ self .timeout = 10 # seconds
81
76
82
77
def tearDown (self ):
83
78
PySparkStreamingTestCase .tearDown (self )
@@ -88,7 +83,8 @@ def tearDownClass(cls):
88
83
89
84
def test_map (self ):
90
85
"""Basic operation test for DStream.map"""
91
- test_input = [range (1 ,5 ), range (5 ,9 ), range (9 , 13 )]
86
+ test_input = [range (1 , 5 ), range (5 , 9 ), range (9 , 13 )]
87
+
92
88
def test_func (dstream ):
93
89
return dstream .map (lambda x : str (x ))
94
90
expected_output = map (lambda x : map (lambda y : str (y ), x ), test_input )
@@ -97,17 +93,19 @@ def test_func(dstream):
97
93
98
94
def test_flatMap (self ):
99
95
"""Basic operation test for DStream.faltMap"""
100
- test_input = [range (1 ,5 ), range (5 ,9 ), range (9 , 13 )]
96
+ test_input = [range (1 , 5 ), range (5 , 9 ), range (9 , 13 )]
97
+
101
98
def test_func (dstream ):
102
99
return dstream .flatMap (lambda x : (x , x * 2 ))
103
100
expected_output = map (lambda x : list (chain .from_iterable ((map (lambda y : [y , y * 2 ], x )))),
104
- test_input )
101
+ test_input )
105
102
output = self ._run_stream (test_input , test_func , expected_output )
106
103
self .assertEqual (expected_output , output )
107
104
108
105
def test_filter (self ):
109
106
"""Basic operation test for DStream.filter"""
110
- test_input = [range (1 ,5 ), range (5 ,9 ), range (9 , 13 )]
107
+ test_input = [range (1 , 5 ), range (5 , 9 ), range (9 , 13 )]
108
+
111
109
def test_func (dstream ):
112
110
return dstream .filter (lambda x : x % 2 == 0 )
113
111
expected_output = map (lambda x : filter (lambda y : y % 2 == 0 , x ), test_input )
@@ -116,7 +114,8 @@ def test_func(dstream):
116
114
117
115
def test_count (self ):
118
116
"""Basic operation test for DStream.count"""
119
- test_input = [[], [1 ], range (1 , 3 ), range (1 ,4 ), range (1 ,5 )]
117
+ test_input = [[], [1 ], range (1 , 3 ), range (1 , 4 ), range (1 , 5 )]
118
+
120
119
def test_func (dstream ):
121
120
return dstream .count ()
122
121
expected_output = map (lambda x : [len (x )], test_input )
@@ -125,7 +124,8 @@ def test_func(dstream):
125
124
126
125
def test_reduce (self ):
127
126
"""Basic operation test for DStream.reduce"""
128
- test_input = [range (1 ,5 ), range (5 ,9 ), range (9 , 13 )]
127
+ test_input = [range (1 , 5 ), range (5 , 9 ), range (9 , 13 )]
128
+
129
129
def test_func (dstream ):
130
130
return dstream .reduce (operator .add )
131
131
expected_output = map (lambda x : [reduce (operator .add , x )], test_input )
@@ -135,19 +135,20 @@ def test_func(dstream):
135
135
def test_reduceByKey (self ):
136
136
"""Basic operation test for DStream.reduceByKey"""
137
137
test_input = [["a" , "a" , "b" ], ["" , "" ], []]
138
+
138
139
def test_func (dstream ):
139
140
return dstream .map (lambda x : (x , 1 )).reduceByKey (operator .add )
140
- expected_output = [[("a" , 2 ), ("b" , 1 )],[("" , 2 )], []]
141
+ expected_output = [[("a" , 2 ), ("b" , 1 )], [("" , 2 )], []]
141
142
output = self ._run_stream (test_input , test_func , expected_output )
142
143
self .assertEqual (expected_output , output )
143
144
144
145
def _run_stream (self , test_input , test_func , expected_output ):
145
146
"""Start stream and return the output"""
146
147
# Generate input stream with user-defined input
147
148
test_input_stream = self .ssc ._testInputStream (test_input )
148
- # Applyed test function to stream
149
+ # Applied test function to stream
149
150
test_stream = test_func (test_input_stream )
150
- # Add job to get outpuf from stream
151
+ # Add job to get output from stream
151
152
test_stream ._test_output (StreamOutput .result )
152
153
self .ssc .start ()
153
154
0 commit comments