1
- < << << << HEAD
2
- == == == =
3
- __author__ = 'ktakagiw'
4
-
5
-
6
- > >> >> >> initial commit for pySparkStreaming
7
1
#
8
2
# Licensed to the Apache Software Foundation (ASF) under one or more
9
3
# contributor license agreements. See the NOTICE file distributed with
21
15
# limitations under the License.
22
16
#
23
17
24
- << < << << HEAD
25
18
import sys
26
19
from signal import signal , SIGTERM , SIGINT
27
20
@@ -36,43 +29,12 @@ class StreamingContext(object):
36
29
"""
37
30
Main entry point for Spark Streaming functionality. A StreamingContext represents the
38
31
connection to a Spark cluster, and can be used to create L{DStream}s and
39
- =======
40
- import os
41
- import shutil
42
- import sys
43
- from threading import Lock
44
- from tempfile import NamedTemporaryFile
45
-
46
- from pyspark import accumulators
47
- from pyspark.accumulators import Accumulator
48
- from pyspark.broadcast import Broadcast
49
- from pyspark.conf import SparkConf
50
- from pyspark.files import SparkFiles
51
- from pyspark.java_gateway import launch_gateway
52
- from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
53
- from pyspark.storagelevel import StorageLevel
54
- from pyspark.rdd import RDD
55
- from pyspark.context import SparkContext
56
-
57
- from py4j.java_collections import ListConverter
58
-
59
- from pyspark.streaming.dstream import DStream
60
-
61
- class StreamingContext(object):
62
- """
63
- Main entry point for Spark functionality . A StreamingContext represents the
64
- connection to a Spark cluster , and can be used to create L {RDD }s and
65
- >> > >> > > initial commit for pySparkStreaming
66
32
broadcast variables on that cluster.
67
33
"""
68
34
69
35
def __init__ (self , master = None , appName = None , sparkHome = None , pyFiles = None ,
70
36
environment = None , batchSize = 1024 , serializer = PickleSerializer (), conf = None ,
71
- <<<<<<< HEAD
72
37
gateway = None , sparkContext = None , duration = None ):
73
- =======
74
- gateway=None, duration=None):
75
- >>>>>>> initial commit for pySparkStreaming
76
38
"""
77
39
Create a new StreamingContext. At least the master and app name and duration
78
40
should be set, either through the named parameters here or through C{conf}.
@@ -93,7 +55,6 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
93
55
@param conf: A L{SparkConf} object setting Spark properties.
94
56
@param gateway: Use an existing gateway and JVM, otherwise a new JVM
95
57
will be instatiated.
96
- << < << < < HEAD
97
58
@param sparkContext: L{SparkContext} object.
98
59
@param duration: A L{Duration} object for SparkStreaming.
99
60
@@ -112,23 +73,13 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
112
73
# is started in StreamingContext.
113
74
SparkContext ._gateway .restart_callback_server ()
114
75
self ._clean_up_trigger ()
115
- =======
116
- @param duration: A L{Duration} Duration for SparkStreaming
117
-
118
- """
119
- # Create the Python Sparkcontext
120
- self ._sc = SparkContext (master = master , appName = appName , sparkHome = sparkHome ,
121
- pyFiles = pyFiles , environment = environment , batchSize = batchSize ,
122
- serializer = serializer , conf = conf , gateway = gateway )
123
- >> >> > >> initial commit for pySparkStreaming
124
76
self ._jvm = self ._sc ._jvm
125
77
self ._jssc = self ._initialize_context (self ._sc ._jsc , duration ._jduration )
126
78
127
79
# Initialize StremaingContext in function to allow subclass specific initialization
128
80
def _initialize_context (self , jspark_context , jduration ):
129
81
return self ._jvm .JavaStreamingContext (jspark_context , jduration )
130
82
131
- << << < << HEAD
132
83
def _clean_up_trigger (self ):
133
84
"""Kill py4j callback server properly using signal lib"""
134
85
@@ -205,53 +156,3 @@ def _testInputStream(self, test_inputs, numSlices=None):
205
156
jinput_stream = self ._jvm .PythonTestInputStream (self ._jssc , jtest_rdds ).asJavaDStream ()
206
157
207
158
return DStream (jinput_stream , self , test_rdd_deserializers [0 ])
208
- == == == =
209
- def actorStream (self , props , name , storageLevel , supervisorStrategy ):
210
- raise NotImplementedError
211
-
212
- def addStreamingListener (self , streamingListener ):
213
- raise NotImplementedError
214
-
215
- def awaitTermination (self , timeout = None ):
216
- if timeout :
217
- self ._jssc .awaitTermination (timeout )
218
- else :
219
- self ._jssc .awaitTermination ()
220
-
221
- def checkpoint (self , directory ):
222
- raise NotImplementedError
223
-
224
- def fileStream (self , directory , filter = None , newFilesOnly = None ):
225
- raise NotImplementedError
226
-
227
- def networkStream (self , receiver ):
228
- raise NotImplementedError
229
-
230
- def queueStream (self , queue , oneAtATime = True , defaultRDD = None ):
231
- raise NotImplementedError
232
-
233
- def rawSocketStream (self , hostname , port , storagelevel ):
234
- raise NotImplementedError
235
-
236
- def remember (self , duration ):
237
- raise NotImplementedError
238
-
239
- def socketStream (hostname , port , converter ,storageLevel ):
240
- raise NotImplementedError
241
-
242
- def start (self ):
243
- self ._jssc .start ()
244
-
245
- def stop (self , stopSparkContext = True ):
246
- raise NotImplementedError
247
-
248
- def textFileStream (self , directory ):
249
- return DStream (self ._jssc .textFileStream (directory ), self , UTF8Deserializer ())
250
-
251
- def transform (self , seq ):
252
- raise NotImplementedError
253
-
254
- def union (self , seq ):
255
- raise NotImplementedError
256
-
257
- >> >> >> > initial commit for pySparkStreaming
0 commit comments