Skip to content

Commit

Permalink
[SPARK-8652] [PYSPARK] Check return value for all uses of doctest.tes…
Browse files Browse the repository at this point in the history
…tmod()

This patch addresses a critical issue in the PySpark tests:

Several of our Python modules' `__main__` methods call `doctest.testmod()` in order to run doctests but forget to check and handle its return value. As a result, some PySpark test failures can go unnoticed because they will not fail the build.

Fortunately, there was only one test failure which was masked by this bug: a `pyspark.profiler` doctest was failing due to changes in RDD pipelining.

Author: Josh Rosen <joshrosen@databricks.com>

Closes apache#7032 from JoshRosen/testmod-fix and squashes the following commits:

60dbdc0 [Josh Rosen] Account for int vs. long formatting change in Python 3
8b8d80a [Josh Rosen] Fix failing test.
e6423f9 [Josh Rosen] Check return code for all uses of doctest.testmod().
  • Loading branch information
JoshRosen authored and Davies Liu committed Jun 26, 2015
1 parent 37bf76a commit 41afa16
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 12 deletions.
4 changes: 3 additions & 1 deletion dev/merge_spark_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ def main():

if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)

main()
4 changes: 3 additions & 1 deletion python/pyspark/accumulators.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,4 +264,6 @@ def _start_update_server():

if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
4 changes: 3 additions & 1 deletion python/pyspark/broadcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,6 @@ def __reduce__(self):

if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
5 changes: 3 additions & 2 deletions python/pyspark/heapq3.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,7 @@ def nlargest(n, iterable, key=None):


if __name__ == "__main__":

import doctest
print(doctest.testmod())
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
8 changes: 6 additions & 2 deletions python/pyspark/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,11 @@ class Profiler(object):
>>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler)
>>> sc.parallelize(range(1000)).map(lambda x: 2 * x).take(10)
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
>>> sc.parallelize(range(1000)).count()
1000
>>> sc.show_profiles()
My custom profiles for RDD:1
My custom profiles for RDD:2
My custom profiles for RDD:3
>>> sc.stop()
"""

Expand Down Expand Up @@ -169,4 +171,6 @@ def stats(self):

if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
8 changes: 5 additions & 3 deletions python/pyspark/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
>>> rdd.glom().collect()
[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
>>> rdd._jrdd.count()
8L
>>> int(rdd._jrdd.count())
8
>>> sc.stop()
"""

Expand Down Expand Up @@ -556,4 +556,6 @@ def write_with_length(obj, stream):

if __name__ == '__main__':
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
4 changes: 3 additions & 1 deletion python/pyspark/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,4 +838,6 @@ def load_partition(j):

if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)
4 changes: 3 additions & 1 deletion python/pyspark/streaming/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,6 @@ def rddToFileName(prefix, suffix, timestamp):

if __name__ == "__main__":
import doctest
doctest.testmod()
(failure_count, test_count) = doctest.testmod()
if failure_count:
exit(-1)

0 comments on commit 41afa16

Please sign in to comment.