File tree Expand file tree Collapse file tree 1 file changed +15
-0
lines changed Expand file tree Collapse file tree 1 file changed +15
-0
lines changed Original file line number Diff line number Diff line change 21
21
from pyspark .serializers import PickleSerializer , BatchedSerializer , UTF8Deserializer
22
22
from pyspark .context import SparkContext
23
23
from pyspark .streaming .dstream import DStream
24
+ from pyspark .streaming .duration import Duration
24
25
25
26
from py4j .java_collections import ListConverter
26
27
@@ -107,6 +108,20 @@ def awaitTermination(self, timeout=None):
107
108
else :
108
109
self ._jssc .awaitTermination (timeout )
109
110
111
+ def remember (self , duration ):
112
+ """
113
+ Set each DStreams in this context to remember RDDs it generated in the last given duration.
114
+ DStreams remember RDDs only for a limited duration of time and releases them for garbage
115
+ collection. This method allows the developer to specify how to long to remember the RDDs (
116
+ if the developer wishes to query old data outside the DStream computation).
117
+ @param duration pyspark.streaming.duration.Duration object.
118
+ Minimum duration that each DStream should remember its RDDs
119
+ """
120
+ if not isinstance (duration , Duration ):
121
+ raise TypeError ("Input should be pyspark.streaming.duration.Duration object" )
122
+
123
+ self ._jssc .remember (duration ._jduration )
124
+
110
125
# TODO: add storageLevel
111
126
def socketTextStream (self , hostname , port ):
112
127
"""
You can’t perform that action at this time.
0 commit comments