@@ -55,6 +55,91 @@ class PythonDStream[T: ClassTag](
55
55
case None => None
56
56
}
57
57
}
58
+ <<<<<<< HEAD
58
59
59
60
val asJavaDStream = JavaDStream .fromDStream(this )
60
61
}
62
+ =======
63
+ val asJavaDStream = JavaDStream .fromDStream(this )
64
+
65
+ /**
66
+ * Print the first ten elements of each PythonRDD generated in this PythonDStream. This is an output
67
+ * operator, so this PythonDStream will be registered as an output stream and there materialized.
68
+ * Since serialized Python object is readable by Python, pyprint writes out binary data to
69
+ * temporary file and run python script to deserialized and print the first ten elements
70
+ */
71
+ private [streaming] def ppyprint () {
72
+ def foreachFunc = (rdd : RDD [Array [Byte ]], time : Time ) => {
73
+ val iter = rdd.take(11 ).iterator
74
+
75
+ // make a temporary file
76
+ val prefix = " spark"
77
+ val suffix = " .tmp"
78
+ val tempFile = File .createTempFile(prefix, suffix)
79
+ val tempFileStream = new DataOutputStream (new FileOutputStream (tempFile.getAbsolutePath))
80
+ // write out serialized python object
81
+ PythonRDD .writeIteratorToStream(iter, tempFileStream)
82
+ tempFileStream.close()
83
+
84
+ // This value has to be passed from python
85
+ // val pythonExec = new ProcessBuilder().environment().get("PYSPARK_PYTHON")
86
+ val sparkHome = new ProcessBuilder ().environment().get(" SPARK_HOME" )
87
+ // val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/streaming/pyprint.py", tempFile.getAbsolutePath())) // why this fails to compile???
88
+ // absolute path to the python script is needed to change because we do not use pysparkstreaming
89
+ val pb = new ProcessBuilder (pythonExec, sparkHome + " /python/pysparkstreaming/streaming/pyprint.py" , tempFile.getAbsolutePath)
90
+ val workerEnv = pb.environment()
91
+
92
+ // envVars also need to be pass
93
+ // workerEnv.putAll(envVars)
94
+ val pythonPath = sparkHome + " /python/" + File .pathSeparator + workerEnv.get(" PYTHONPATH" )
95
+ workerEnv.put(" PYTHONPATH" , pythonPath)
96
+ val worker = pb.start()
97
+ val is = worker.getInputStream()
98
+ val isr = new InputStreamReader (is)
99
+ val br = new BufferedReader (isr)
100
+
101
+ println (" -------------------------------------------" )
102
+ println (" Time: " + time)
103
+ println (" -------------------------------------------" )
104
+
105
+ // print value from python std out
106
+ var line = " "
107
+ breakable {
108
+ while (true ) {
109
+ line = br.readLine()
110
+ if (line == null ) break()
111
+ println(line)
112
+ }
113
+ }
114
+ // delete temporary file
115
+ tempFile.delete()
116
+ println()
117
+
118
+ }
119
+ new ForEachDStream (this , context.sparkContext.clean(foreachFunc)).register()
120
+ }
121
+ }
122
+
123
+
124
+ private class PairwiseDStream (prev: DStream [Array [Byte ]]) extends
125
+ DStream [(Long , Array [Byte ])](prev.ssc){
126
+ override def dependencies = List (prev)
127
+
128
+ override def slideDuration : Duration = prev.slideDuration
129
+
130
+ override def compute (validTime: Time ): Option [RDD [(Long , Array [Byte ])]]= {
131
+ prev.getOrCompute(validTime) match {
132
+ case Some (rdd)=> Some (rdd)
133
+ val pairwiseRDD = new PairwiseRDD (rdd)
134
+ Some (pairwiseRDD.asJavaPairRDD.rdd)
135
+ case None => None
136
+ }
137
+ }
138
+ val asJavaPairDStream : JavaPairDStream [Long , Array [Byte ]] = JavaPairDStream .fromJavaDStream(this )
139
+ }
140
+
141
+
142
+
143
+
144
+
145
+ >>>>>>> added reducedByKey not working yet
0 commit comments