Skip to content

Commit 0d30109

Browse files
committed
fixed pep8 violation
1 parent b7dab85 commit 0d30109

File tree

7 files changed

+41
-31
lines changed

7 files changed

+41
-31
lines changed

python/pyspark/streaming/context.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ class StreamingContext(object):
3333
"""
3434

3535
def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
36-
environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None,
37-
gateway=None, sparkContext=None, duration=None):
36+
environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None,
37+
gateway=None, sparkContext=None, duration=None):
3838
"""
3939
Create a new StreamingContext. At least the master and app name and duration
4040
should be set, either through the named parameters here or through C{conf}.
@@ -63,8 +63,8 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
6363
if sparkContext is None:
6464
# Create the Python Sparkcontext
6565
self._sc = SparkContext(master=master, appName=appName, sparkHome=sparkHome,
66-
pyFiles=pyFiles, environment=environment, batchSize=batchSize,
67-
serializer=serializer, conf=conf, gateway=gateway)
66+
pyFiles=pyFiles, environment=environment, batchSize=batchSize,
67+
serializer=serializer, conf=conf, gateway=gateway)
6868
else:
6969
self._sc = sparkContext
7070

@@ -107,7 +107,7 @@ def awaitTermination(self, timeout=None):
107107
else:
108108
self._jssc.awaitTermination(timeout)
109109

110-
#TODO: add storageLevel
110+
# TODO: add storageLevel
111111
def socketTextStream(self, hostname, port):
112112
"""
113113
Create an input from TCP source hostname:port. Data is received using

python/pyspark/streaming/dstream.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def filter(self, f):
7575
"""
7676
Return a new DStream containing only the elements that satisfy predicate.
7777
"""
78-
def func(iterator): return ifilter(f, iterator)
78+
def func(iterator):
79+
return ifilter(f, iterator)
7980
return self.mapPartitions(func)
8081

8182
def flatMap(self, f, preservesPartitioning=False):
@@ -130,7 +131,7 @@ def reduceByKey(self, func, numPartitions=None):
130131
return self.combineByKey(lambda x: x, func, func, numPartitions)
131132

132133
def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
133-
numPartitions = None):
134+
numPartitions=None):
134135
"""
135136
Count the number of elements for each key, and return the result to the
136137
master as a dictionary
@@ -153,7 +154,7 @@ def combineLocally(iterator):
153154
def _mergeCombiners(iterator):
154155
combiners = {}
155156
for (k, v) in iterator:
156-
if not k in combiners:
157+
if k not in combiners:
157158
combiners[k] = v
158159
else:
159160
combiners[k] = mergeCombiners(combiners[k], v)
@@ -188,7 +189,7 @@ def add_shuffle_key(split, iterator):
188189
keyed._bypass_serializer = True
189190
with _JavaStackTrace(self.ctx) as st:
190191
partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
191-
id(partitionFunc))
192+
id(partitionFunc))
192193
jdstream = self.ctx._jvm.PythonPairwiseDStream(keyed._jdstream.dstream(),
193194
partitioner).asJavaDStream()
194195
dstream = DStream(jdstream, self._ssc, BatchedSerializer(outputSerializer))

python/pyspark/streaming/duration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ def _is_duration(self, instance):
333333
if not isinstance(instance, Duration):
334334
raise TypeError("This should be Duration")
335335

336+
336337
def Milliseconds(milliseconds):
337338
"""
338339
Helper function that creates instance of [[pysparkstreaming.duration]] representing
@@ -346,6 +347,7 @@ def Milliseconds(milliseconds):
346347
"""
347348
return Duration(milliseconds)
348349

350+
349351
def Seconds(seconds):
350352
"""
351353
Helper function that creates instance of [[pysparkstreaming.duration]] representing
@@ -359,6 +361,7 @@ def Seconds(seconds):
359361
"""
360362
return Duration(seconds * 1000)
361363

364+
362365
def Minutes(minutes):
363366
"""
364367
Helper function that creates instance of [[pysparkstreaming.duration]] representing

python/pyspark/streaming/tests.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@
3232
import sys
3333

3434
if sys.version_info[:2] <= (2, 6):
35-
import unittest2 as unittest
36-
else:
37-
import unittest
35+
import unittest2 as unittest
36+
else:
37+
import unittest
3838

3939
from pyspark.context import SparkContext
4040
from pyspark.streaming.context import StreamingContext
@@ -57,7 +57,7 @@ def tearDown(self):
5757

5858
@classmethod
5959
def tearDownClass(cls):
60-
# Make sure tp shutdown the callback server
60+
# Make sure tp shutdown the callback server
6161
SparkContext._gateway._shutdown_callback_server()
6262

6363

@@ -71,7 +71,7 @@ class TestBasicOperationsSuite(PySparkStreamingTestCase):
7171
7272
All tests input should have list of lists(3 lists are default). This list represents stream.
7373
Every batch interval, the first object of list are chosen to make DStream.
74-
e.g The first list in the list is input of the first batch.
74+
e.g The first list in the list is input of the first batch.
7575
Please see the BasicTestSuits in Scala which is close to this implementation.
7676
"""
7777
def setUp(self):
@@ -112,7 +112,7 @@ def test_flatMap_batch(self):
112112

113113
def test_func(dstream):
114114
return dstream.flatMap(lambda x: (x, x * 2))
115-
expected_output = map(lambda x: list(chain.from_iterable((map(lambda y: [y, y * 2], x)))),
115+
expected_output = map(lambda x: list(chain.from_iterable((map(lambda y: [y, y * 2], x)))),
116116
test_input)
117117
output = self._run_stream(test_input, test_func, expected_output)
118118
self.assertEqual(expected_output, output)
@@ -191,12 +191,12 @@ def test_func(dstream):
191191
def test_reduceByKey_batch(self):
192192
"""Basic operation test for DStream.reduceByKey with batch deserializer."""
193193
test_input = [[("a", 1), ("a", 1), ("b", 1), ("b", 1)],
194-
[("", 1),("", 1), ("", 1), ("", 1)],
194+
[("", 1), ("", 1), ("", 1), ("", 1)],
195195
[(1, 1), (1, 1), (2, 1), (2, 1), (3, 1)]]
196196

197197
def test_func(dstream):
198198
return dstream.reduceByKey(operator.add)
199-
expected_output = [[("a", 2), ("b", 2)], [("", 4)], [(1, 2), (2, 2), (3 ,1)]]
199+
expected_output = [[("a", 2), ("b", 2)], [("", 4)], [(1, 2), (2, 2), (3, 1)]]
200200
output = self._run_stream(test_input, test_func, expected_output)
201201
for result in (output, expected_output):
202202
self._sort_result_based_on_key(result)
@@ -216,13 +216,13 @@ def test_func(dstream):
216216

217217
def test_mapValues_batch(self):
218218
"""Basic operation test for DStream.mapValues with batch deserializer."""
219-
test_input = [[("a", 2), ("b", 2), ("c", 1), ("d", 1)],
219+
test_input = [[("a", 2), ("b", 2), ("c", 1), ("d", 1)],
220220
[("", 4), (1, 1), (2, 2), (3, 3)],
221221
[(1, 1), (2, 1), (3, 1), (4, 1)]]
222222

223223
def test_func(dstream):
224224
return dstream.mapValues(lambda x: x + 10)
225-
expected_output = [[("a", 12), ("b", 12), ("c", 11), ("d", 11)],
225+
expected_output = [[("a", 12), ("b", 12), ("c", 11), ("d", 11)],
226226
[("", 14), (1, 11), (2, 12), (3, 13)],
227227
[(1, 11), (2, 11), (3, 11), (4, 11)]]
228228
output = self._run_stream(test_input, test_func, expected_output)
@@ -250,7 +250,8 @@ def test_flatMapValues_batch(self):
250250

251251
def test_func(dstream):
252252
return dstream.flatMapValues(lambda x: (x, x + 10))
253-
expected_output = [[("a", 2), ("a", 12), ("b", 2), ("b", 12), ("c", 1), ("c", 11), ("d", 1), ("d", 11)],
253+
expected_output = [[("a", 2), ("a", 12), ("b", 2), ("b", 12),
254+
("c", 1), ("c", 11), ("d", 1), ("d", 11)],
254255
[("", 4), ("", 14), (1, 1), (1, 11), (2, 1), (2, 11), (3, 1), (3, 11)],
255256
[(1, 1), (1, 11), (2, 1), (2, 11), (3, 1), (3, 11), (4, 1), (4, 11)]]
256257
output = self._run_stream(test_input, test_func, expected_output)
@@ -344,7 +345,7 @@ def test_func(dstream):
344345

345346
def test_groupByKey_batch(self):
346347
"""Basic operation test for DStream.groupByKey with batch deserializer."""
347-
test_input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
348+
test_input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
348349
[(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
349350
[("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1), ("", 1)]]
350351

@@ -361,7 +362,7 @@ def test_func(dstream):
361362

362363
def test_groupByKey_unbatch(self):
363364
"""Basic operation test for DStream.groupByKey with unbatch deserializer."""
364-
test_input = [[(1, 1), (2, 1), (3, 1)],
365+
test_input = [[(1, 1), (2, 1), (3, 1)],
365366
[(1, 1), (1, 1), ("", 1)],
366367
[("a", 1), ("a", 1), ("b", 1)]]
367368

@@ -378,12 +379,13 @@ def test_func(dstream):
378379

379380
def test_combineByKey_batch(self):
380381
"""Basic operation test for DStream.combineByKey with batch deserializer."""
381-
test_input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
382-
[(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
382+
test_input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
383+
[(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
383384
[("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1), ("", 1)]]
384385

385386
def test_func(dstream):
386-
def add(a, b): return a + str(b)
387+
def add(a, b):
388+
return a + str(b)
387389
return dstream.combineByKey(str, add, add)
388390
expected_output = [[(1, "1"), (2, "1"), (3, "1"), (4, "1")],
389391
[(1, "111"), (2, "11"), (3, "1")],
@@ -395,10 +397,13 @@ def add(a, b): return a + str(b)
395397

396398
def test_combineByKey_unbatch(self):
397399
"""Basic operation test for DStream.combineByKey with unbatch deserializer."""
398-
test_input = [[(1, 1), (2, 1), (3, 1)], [(1, 1), (1, 1), ("", 1)], [("a", 1), ("a", 1), ("b", 1)]]
400+
test_input = [[(1, 1), (2, 1), (3, 1)],
401+
[(1, 1), (1, 1), ("", 1)],
402+
[("a", 1), ("a", 1), ("b", 1)]]
399403

400404
def test_func(dstream):
401-
def add(a, b): return a + str(b)
405+
def add(a, b):
406+
return a + str(b)
402407
return dstream.combineByKey(str, add, add)
403408
expected_output = [[(1, "1"), (2, "1"), (3, "1")],
404409
[(1, "11"), ("", "1")],
@@ -445,7 +450,7 @@ def _run_stream(self, test_input, test_func, expected_output, numSlices=None):
445450
# Check time out.
446451
if (current_time - start_time) > self.timeout:
447452
break
448-
# StreamingContext.awaitTermination is not used to wait because
453+
# StreamingContext.awaitTermination is not used to wait because
449454
# if py4j server is called every 50 milliseconds, it gets an error.
450455
time.sleep(0.05)
451456
# Check if the output is the same length of expected output.

python/pyspark/streaming/util.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def msDurationToString(ms):
5454
>>> msDurationToString(3600000)
5555
'1.00 h'
5656
"""
57-
#TODO: add doctest
5857
second = 1000
5958
minute = 60 * second
6059
hour = 60 * minute

python/pyspark/worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def main(infile, outfile):
5858
SparkFiles._is_running_on_worker = True
5959

6060
# fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH
61-
sys.path.append(spark_files_dir) # *.py files that were added will be copied here
61+
sys.path.append(spark_files_dir) # *.py files that were added will be copied here
6262
num_python_includes = read_int(infile)
6363
for _ in range(num_python_includes):
6464
filename = utf8_deserializer.loads(infile)

python/run-tests

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ run_test "pyspark/mllib/recommendation.py"
8282
run_test "pyspark/mllib/regression.py"
8383
run_test "pyspark/mllib/tests.py"
8484
run_test "pyspark/mllib/util.py"
85-
run_test "pyspark/streaming/tests.py"
85+
if [ -n "$_RUN_STREAMING_TESTS" ]; then
86+
run_test "pyspark/streaming/tests.py"
87+
fi
8688

8789
if [[ $FAILED == 0 ]]; then
8890
echo -en "\033[32m" # Green

0 commit comments

Comments
 (0)