File tree Expand file tree Collapse file tree 1 file changed +12
-3
lines changed
streaming/src/main/scala/org/apache/spark/streaming/scheduler/rate Expand file tree Collapse file tree 1 file changed +12
-3
lines changed Original file line number Diff line number Diff line change @@ -72,14 +72,23 @@ private[streaming] class PIDRateEstimator(
72
72
// in elements/second
73
73
val error = latestRate - processingRate
74
74
75
- // in elements/second
76
- val sumError = schedulingDelay.toDouble * processingRate / batchIntervalMillis
75
+ // The error integral, based on schedulingDelay as an indicator for accumulated errors
76
+ // a scheduling delay s corresponds to s * processingRate overflowing elements. Those
77
+ // are elements that couldn't be processed in previous batches, leading to this delay.
78
+ // We assume the processingRate didn't change too much.
79
+ // from the number of overflowing elements we can calculate the rate at which they would be
80
+ // processed by dividing it by the batch interval. This rate is our "historical" error,
81
+ // or integral part, since if we subtracted this rate from the previous "calculated rate",
82
+ // there wouldn't have been any overflowing elements, and the scheduling delay would have
83
+ // been zero.
84
+ // (in elements/second)
85
+ val historicalError = schedulingDelay.toDouble * processingRate / batchIntervalMillis
77
86
78
87
// in elements/(second ^ 2)
79
88
val dError = (error - latestError) / delaySinceUpdate
80
89
81
90
val newRate = (latestRate + proportional * error +
82
- integral * sumError +
91
+ integral * historicalError +
83
92
derivative * dError).max(0.0 )
84
93
latestTime = time
85
94
if (firstRun) {
You can’t perform that action at this time.
0 commit comments