@@ -20,21 +20,14 @@ def __init__(self, jdstream, ssc, jrdd_deserializer):
20
20
self .ctx = ssc ._sc
21
21
self ._jrdd_deserializer = jrdd_deserializer
22
22
23
- def generatedRDDs (self ):
24
- """
25
- // RDDs generated, marked as private[streaming] so that testsuites can access it
26
- @transient
27
- """
28
- pass
29
-
30
23
def count (self ):
31
24
"""
32
25
33
26
"""
34
27
#TODO make sure count implementation, thiis different from what pyspark does
35
- return self .mapPartitions (lambda i : [sum (1 for _ in i )]).sum (). map (lambda x : x [ 1 ] )
28
+ return self .mapPartitions (lambda i : [sum (1 for _ in i )]).map (lambda x : ( None , 1 ) )
36
29
37
- def sum (self ):
30
+ def _sum (self ):
38
31
"""
39
32
"""
40
33
return self .mapPartitions (lambda x : [sum (x )]).reduce (operator .add )
@@ -65,15 +58,22 @@ def func(s, iterator): return chain.from_iterable(imap(f, iterator))
65
58
def map (self , f , preservesPartitioning = False ):
66
59
"""
67
60
"""
68
- def func (split , iterator ): return imap (f , iterator )
69
- return PipelinedDStream (self , func , preservesPartitioning )
61
+ def func (iterator ): return imap (f , iterator )
62
+ return self .mapPartitions (func )
63
+ #return PipelinedDStream(self, func, preservesPartitioning)
70
64
71
65
def mapPartitions (self , f ):
72
66
"""
73
67
"""
74
68
def func (s , iterator ): return f (iterator )
75
69
return self .mapPartitionsWithIndex (func )
76
70
71
+ def mapPartitionsWithIndex (self , f , preservesPartitioning = False ):
72
+ """
73
+
74
+ """
75
+ return PipelinedDStream (self , f , preservesPartitioning )
76
+
77
77
def reduce (self , func , numPartitions = None ):
78
78
"""
79
79
@@ -92,8 +92,8 @@ def combineLocally(iterator):
92
92
93
93
#TODO for count operation make sure count implementation
94
94
# This is different from what pyspark does
95
- if isinstance (x , int ):
96
- x = ("" , x )
95
+ # if isinstance(x, int):
96
+ # x = ("", x)
97
97
98
98
(k , v ) = x
99
99
if k not in combiners :
0 commit comments