Skip to content

Commit eaf89d9

Browse files
committed
added correct doctest for histogram
1 parent 4916016 commit eaf89d9

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

python/pyspark/rdd.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -613,14 +613,6 @@ def sampleVariance(self):
613613
return self.stats().sampleVariance()
614614

615615
def getBuckets(self, bucketCount):
616-
"""
617-
Compute a histogram of the data using bucketCount number of buckets
618-
evenly spaced between the min and max of the RDD.
619-
620-
>>> sc.parallelize([1,49, 23, 100, 75, 50]).histogram()
621-
{(0,49):3, (50, 100):3}
622-
"""
623-
624616
#use the statscounter as a quick way of getting max and min
625617
mm_stats = self.stats()
626618
min = mm_stats.min()
@@ -634,6 +626,14 @@ def getBuckets(self, bucketCount):
634626
return buckets
635627

636628
def histogram(self, bucketCount, buckets=None):
629+
"""
630+
Compute a histogram of the data using bucketCount number of buckets
631+
evenly spaced between the min and max of the RDD.
632+
633+
>>> sc.parallelize([1,49, 23, 100, 12, 13, 20, 22, 75, 50]).histogram(3)
634+
defaultdict(<type 'int'>, {(67, inf): 2, (1, 33): 6, (34, 66): 2})
635+
"""
636+
637637
evenBuckets = False
638638
if not buckets:
639639
buckets = self.getBuckets(bucketCount)

0 commit comments

Comments
 (0)