33
33
import unittest
34
34
35
35
from pyspark .context import SparkContext
36
+ from pyspark .conf import SparkConf
36
37
from pyspark .streaming .context import StreamingContext
37
38
from pyspark .streaming .duration import *
38
39
@@ -47,8 +48,6 @@ def tearDown(self):
47
48
# we do not wait to shutdown py4j client.
48
49
self .ssc ._jssc .stop ()
49
50
self .ssc ._sc .stop ()
50
- # Why does it long time to terminate StremaingContext and SparkContext?
51
- # Should we change the sleep time if this depends on machine spec?
52
51
time .sleep (1 )
53
52
54
53
@classmethod
@@ -455,6 +454,71 @@ def _run_stream(self, test_input, test_func, expected_output, numSlices=None):
455
454
456
455
return result
457
456
457
+
458
+ class TestStreamingContextSuite (unittest .TestCase ):
459
+ """
460
+ Should we have conf property in SparkContext?
461
+ @property
462
+ def conf(self):
463
+ return self._conf
464
+
465
+ """
466
+ def setUp (self ):
467
+ self .master = "local[2]"
468
+ self .appName = self .__class__ .__name__
469
+ self .batachDuration = Milliseconds (500 )
470
+ self .sparkHome = "SomeDir"
471
+ self .envPair = {"key" : "value" }
472
+
473
+ def tearDown (self ):
474
+ # Do not call pyspark.streaming.context.StreamingContext.stop directly because
475
+ # we do not wait to shutdown py4j client.
476
+ self .ssc ._jssc .stop ()
477
+ self .ssc ._sc .stop ()
478
+ # Why does it long time to terminate StremaingContext and SparkContext?
479
+ # Should we change the sleep time if this depends on machine spec?
480
+ time .sleep (1 )
481
+
482
+ @classmethod
483
+ def tearDownClass (cls ):
484
+ # Make sure tp shutdown the callback server
485
+ SparkContext ._gateway ._shutdown_callback_server ()
486
+
487
+
488
+ def test_from_no_conf_constructor (self ):
489
+ ssc = StreamingContext (master = self .master , appName = self .appName , duration = batachDuration )
490
+ # Alternative call master: ssc.sparkContext.master
491
+ # I try to make code close to Scala.
492
+ self .assertEqual (ssc .sparkContext ._conf .get ("spark.master" ), self .master )
493
+ self .assertEqual (ssc .sparkContext ._conf .get ("spark.app.name" ), self .appName )
494
+
495
+ def test_from_no_conf_plus_spark_home (self ):
496
+ ssc = StreamingContext (master = self .master , appName = self .appName ,
497
+ sparkHome = self .sparkHome , duration = batachDuration )
498
+ self .assertEqual (ssc .sparkContext ._conf .get ("spark.home" ), self .sparkHome )
499
+
500
+ def test_from_existing_spark_context (self ):
501
+ sc = SparkContext (master = self .master , appName = self .appName )
502
+ ssc = StreamingContext (sparkContext = sc )
503
+
504
+ def test_existing_spark_context_with_settings (self ):
505
+ conf = SparkConf ()
506
+ conf .set ("spark.cleaner.ttl" , "10" )
507
+ sc = SparkContext (master = self .master , appName = self .appName , conf = conf )
508
+ ssc = StreamingContext (context = sc )
509
+ self .assertEqual (int (ssc .sparkContext ._conf .get ("spark.cleaner.ttl" )), 10 )
510
+
511
+ def _addInputStream (self , s ):
512
+ test_inputs = map (lambda x : range (1 , x ), range (5 , 101 ))
513
+ # make sure numSlice is 2 due to deserializer proglem in pyspark
514
+ s ._testInputStream (test_inputs , 2 )
515
+
516
+
517
+
518
+
519
+
520
+
521
+
458
522
if __name__ == "__main__" :
459
523
unittest .main ()
460
524
SparkContext ._gateway ._shutdown_callback_server ()
0 commit comments