Skip to content

[PySpark] Add blanklines to Python docstrings so example code renders correctly #1808

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class MaxHeapQ(object):

"""
An implementation of MaxHeap.

>>> import pyspark.rdd
>>> heap = pyspark.rdd.MaxHeapQ(5)
>>> [heap.insert(i) for i in range(10)]
Expand Down Expand Up @@ -381,6 +382,7 @@ def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
def getNumPartitions(self):
"""
Returns the number of partitions in RDD

>>> rdd = sc.parallelize([1, 2, 3, 4], 2)
>>> rdd.getNumPartitions()
2
Expand Down Expand Up @@ -570,6 +572,7 @@ def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
"""
Sorts this RDD, which is assumed to consist of (key, value) pairs.
# noqa

>>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
>>> sc.parallelize(tmp).sortByKey(True, 2).collect()
[('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
Expand Down Expand Up @@ -1205,6 +1208,7 @@ def collectAsMap(self):
def keys(self):
"""
Return an RDD with the keys of each tuple.

>>> m = sc.parallelize([(1, 2), (3, 4)]).keys()
>>> m.collect()
[1, 3]
Expand All @@ -1214,6 +1218,7 @@ def keys(self):
def values(self):
"""
Return an RDD with the values of each tuple.

>>> m = sc.parallelize([(1, 2), (3, 4)]).values()
>>> m.collect()
[2, 4]
Expand Down Expand Up @@ -1638,6 +1643,7 @@ def repartition(self, numPartitions):
Internally, this uses a shuffle to redistribute data.
If you are decreasing the number of partitions in this RDD, consider
using `coalesce`, which can avoid performing a shuffle.

>>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
>>> sorted(rdd.glom().collect())
[[1], [2, 3], [4, 5], [6, 7]]
Expand All @@ -1652,6 +1658,7 @@ def repartition(self, numPartitions):
def coalesce(self, numPartitions, shuffle=False):
"""
Return a new RDD that is reduced into `numPartitions` partitions.

>>> sc.parallelize([1, 2, 3, 4, 5], 3).glom().collect()
[[1], [2, 3], [4, 5]]
>>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
Expand Down Expand Up @@ -1690,6 +1697,7 @@ def name(self):
def setName(self, name):
"""
Assign a name to this RDD.

>>> rdd1 = sc.parallelize([1,2])
>>> rdd1.setName('RDD1')
>>> rdd1.name()
Expand Down Expand Up @@ -1749,6 +1757,7 @@ class PipelinedRDD(RDD):

"""
Pipelined maps:

>>> rdd = sc.parallelize([1, 2, 3, 4])
>>> rdd.map(lambda x: 2 * x).cache().map(lambda x: 2 * x).collect()
[4, 8, 12, 16]
Expand Down