Skip to content

Commit f0a7ebf

Browse files
committed
[SPARK-2470] PEP8 fixes to rddsampler.py
1 parent 4dd148f commit f0a7ebf

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

python/pyspark/rddsampler.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@
1818
import sys
1919
import random
2020

21+
2122
class RDDSampler(object):
2223
def __init__(self, withReplacement, fraction, seed=None):
2324
try:
2425
import numpy
2526
self._use_numpy = True
2627
except ImportError:
27-
print >> sys.stderr, "NumPy does not appear to be installed. Falling back to default random generator for sampling."
28+
print >> sys.stderr, (
29+
"NumPy does not appear to be installed. "
30+
"Falling back to default random generator for sampling.")
2831
self._use_numpy = False
2932

3033
self._seed = seed if seed is not None else random.randint(0, sys.maxint)
@@ -61,7 +64,7 @@ def getUniformSample(self, split):
6164
def getPoissonSample(self, split, mean):
6265
if not self._rand_initialized or split != self._split:
6366
self.initRandomGenerator(split)
64-
67+
6568
if self._use_numpy:
6669
return self._random.poisson(mean)
6770
else:
@@ -80,30 +83,27 @@ def getPoissonSample(self, split, mean):
8083
num_arrivals += 1
8184

8285
return (num_arrivals - 1)
83-
86+
8487
def shuffle(self, vals):
8588
if self._random is None:
8689
self.initRandomGenerator(0) # this should only ever called on the master so
8790
# the split does not matter
88-
91+
8992
if self._use_numpy:
9093
self._random.shuffle(vals)
9194
else:
9295
self._random.shuffle(vals, self._random.random)
9396

9497
def func(self, split, iterator):
95-
if self._withReplacement:
98+
if self._withReplacement:
9699
for obj in iterator:
97-
# For large datasets, the expected number of occurrences of each element in a sample with
98-
# replacement is Poisson(frac). We use that to get a count for each element.
99-
count = self.getPoissonSample(split, mean = self._fraction)
100+
# For large datasets, the expected number of occurrences of each element in
101+
# a sample with replacement is Poisson(frac). We use that to get a count for
102+
# each element.
103+
count = self.getPoissonSample(split, mean=self._fraction)
100104
for _ in range(0, count):
101105
yield obj
102106
else:
103107
for obj in iterator:
104108
if self.getUniformSample(split) <= self._fraction:
105109
yield obj
106-
107-
108-
109-

0 commit comments

Comments
 (0)