@@ -55,91 +55,34 @@ class PythonDStream[T: ClassTag](
55
55
case None => None
56
56
}
57
57
}
58
- <<<<<<< HEAD
59
58
60
59
val asJavaDStream = JavaDStream .fromDStream(this )
61
60
}
62
- =======
63
- val asJavaDStream = JavaDStream .fromDStream(this )
64
-
65
- /**
66
- * Print the first ten elements of each PythonRDD generated in this PythonDStream. This is an output
67
- * operator, so this PythonDStream will be registered as an output stream and there materialized.
68
- * Since serialized Python object is readable by Python, pyprint writes out binary data to
69
- * temporary file and run python script to deserialized and print the first ten elements
70
- */
71
- private [streaming] def ppyprint () {
72
- def foreachFunc = (rdd : RDD [Array [Byte ]], time : Time ) => {
73
- val iter = rdd.take(11 ).iterator
74
-
75
- // make a temporary file
76
- val prefix = " spark"
77
- val suffix = " .tmp"
78
- val tempFile = File .createTempFile(prefix, suffix)
79
- val tempFileStream = new DataOutputStream (new FileOutputStream (tempFile.getAbsolutePath))
80
- // write out serialized python object
81
- PythonRDD .writeIteratorToStream(iter, tempFileStream)
82
- tempFileStream.close()
83
-
84
- // This value has to be passed from python
85
- // val pythonExec = new ProcessBuilder().environment().get("PYSPARK_PYTHON")
86
- val sparkHome = new ProcessBuilder ().environment().get(" SPARK_HOME" )
87
- // val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/streaming/pyprint.py", tempFile.getAbsolutePath())) // why this fails to compile???
88
- // absolute path to the python script is needed to change because we do not use pysparkstreaming
89
- val pb = new ProcessBuilder (pythonExec, sparkHome + " /python/pysparkstreaming/streaming/pyprint.py" , tempFile.getAbsolutePath)
90
- val workerEnv = pb.environment()
91
-
92
- // envVars also need to be pass
93
- // workerEnv.putAll(envVars)
94
- val pythonPath = sparkHome + " /python/" + File .pathSeparator + workerEnv.get(" PYTHONPATH" )
95
- workerEnv.put(" PYTHONPATH" , pythonPath)
96
- val worker = pb.start()
97
- val is = worker.getInputStream()
98
- val isr = new InputStreamReader (is)
99
- val br = new BufferedReader (isr)
100
61
101
- println (" -------------------------------------------" )
102
- println (" Time: " + time)
103
- println (" -------------------------------------------" )
104
62
105
- // print value from python std out
106
- var line = " "
107
- breakable {
108
- while (true ) {
109
- line = br.readLine()
110
- if (line == null ) break()
111
- println(line)
112
- }
113
- }
114
- // delete temporary file
115
- tempFile.delete()
116
- println()
117
-
118
- }
119
- new ForEachDStream (this , context.sparkContext.clean(foreachFunc)).register()
120
- }
121
- }
122
-
123
-
124
- private class PairwiseDStream (prev: DStream [Array [Byte ]]) extends
125
- DStream [(Long , Array [Byte ])](prev.ssc){
63
+ private class PairwiseDStream (prev: DStream [Array [Byte ]], partitioner : Partitioner ) extends
64
+ DStream [Array [Byte ]](prev.ssc){
126
65
override def dependencies = List (prev)
127
66
128
67
override def slideDuration : Duration = prev.slideDuration
129
68
130
- override def compute (validTime: Time ): Option [RDD [( Long , Array [Byte ]) ]]= {
69
+ override def compute (validTime: Time ): Option [RDD [Array [Byte ]]]= {
131
70
prev.getOrCompute(validTime) match {
132
71
case Some (rdd)=> Some (rdd)
133
72
val pairwiseRDD = new PairwiseRDD (rdd)
134
- Some (pairwiseRDD.asJavaPairRDD.rdd)
73
+ /*
74
+ * This is equivalent to following python code
75
+ * with _JavaStackTrace(self.context) as st:
76
+ * pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
77
+ * partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
78
+ * id(partitionFunc))
79
+ * jrdd = pairRDD.partitionBy(partitioner).values()
80
+ * rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
81
+ */
82
+ Some (pairwiseRDD.asJavaPairRDD.partitionBy(partitioner).values().rdd)
135
83
case None => None
136
84
}
137
85
}
138
- val asJavaPairDStream : JavaPairDStream [Long , Array [Byte ]] = JavaPairDStream .fromJavaDStream(this )
86
+ val asJavaDStream = JavaDStream .fromDStream(this )
87
+ // val asJavaPairDStream : JavaPairDStream[Long, Array[Byte]] = JavaPairDStream.fromJavaDStream(this)
139
88
}
140
-
141
-
142
-
143
-
144
-
145
- >>>>>>> added reducedByKey not working yet
0 commit comments