@@ -4,6 +4,8 @@ import java.util.concurrent.TimeUnit
4
4
5
5
import io .sqooba .oss .timeseries .bucketing .TimeBucketer
6
6
import io .sqooba .oss .timeseries .immutable ._
7
+ import io .sqooba .oss .timeseries .window .WindowSlider .window
8
+ import io .sqooba .oss .timeseries .window .{TimeAwareReversibleAggregator , TimeUnawareReversibleAggregator , WindowSlider }
7
9
8
10
import scala .annotation .tailrec
9
11
import scala .collection .mutable
@@ -92,37 +94,48 @@ trait TimeSeries[+T] {
92
94
def isCompressed : Boolean
93
95
94
96
/** Map the values within the time series.
95
- * the 'compress' parameters allows callers to control whether or not compression should occur.
96
- * If set to false, timestamps and validities remain unchanged. Defaults to true */
97
- def map [O : WeakTypeTag ](f : T => O , compress : Boolean = true ): TimeSeries [O ]
97
+ *
98
+ * @param compress controls whether or not compression should occur on the output series.
99
+ * If set to false, timestamps and validities remain unchanged. Defaults to true.
100
+ */
101
+ def map [O : WeakTypeTag ](f : T => O , compress : Boolean = true ): TimeSeries [O ] =
102
+ mapWithTime[O ]((_, value) => f(value), compress)
98
103
99
- /** Map the values within the time series.
100
- * Timestamps and validities of entries remain unchanged,
101
- * but the time is made available for cases where the new value would depend on it. */
104
+ /** Map the values within the time series. If not compressing, the timestamps and
105
+ * validities of entries remain unchanged, but the time is made available for
106
+ * cases where the new value would depend on it.
107
+ *
108
+ * @param compress controls whether or not compression should occur on the output series.
109
+ * Defaults to true.
110
+ */
102
111
def mapWithTime [O : WeakTypeTag ](f : (Long , T ) => O , compress : Boolean = true ): TimeSeries [O ]
103
112
104
113
/** Return a time series that will only contain entries for which the passed predicate returned True. */
105
114
def filter (predicate : TSEntry [T ] => Boolean ): TimeSeries [T ]
106
115
107
116
/** Return a time series that will only contain entries containing values for which the passed predicate returned True. */
108
- def filterValues (predicate : T => Boolean ): TimeSeries [T ]
117
+ def filterValues (predicate : T => Boolean ): TimeSeries [T ] =
118
+ filter(tse => predicate(tse.value))
109
119
110
120
/** Fill the wholes in the definition domain of this time series with the passed value.
111
121
* The resulting time series will have a single continuous definition domain,
112
- * provided the original time series was non-empty. */
113
- def fill [U >: T ](whenUndef : U ): TimeSeries [U ]
122
+ * provided the original time series was non-empty.
123
+ */
124
+ def fill [U >: T ](whenUndef : U ): TimeSeries [U ] = {
125
+ val (start, end) = (this .head.timestamp, this .last.definedUntil)
126
+ this .fallback(TSEntry (start, whenUndef, end - start))
127
+ }
114
128
115
129
/** Return a Seq of the TSEntries representing this time series. */
116
130
def entries : Seq [TSEntry [T ]]
117
131
118
132
/** Return a Seq of the values contained by this series, in their chronological order. */
119
- def values : Seq [T ] =
120
- entries.map(_.value)
133
+ def values : Seq [T ] = entries.map(_.value)
121
134
122
135
/** Return the first (chronological) entry in this time series.
123
136
*
124
137
* @throws NoSuchElementException if this time series is empty. */
125
- def head : TSEntry [T ]
138
+ def head : TSEntry [T ] = headOption.get
126
139
127
140
/** Return a filled option containing the first (chronological) entry in this
128
141
* time series.
@@ -142,7 +155,7 @@ trait TimeSeries[+T] {
142
155
/** Return the last (chronological) entry in this time series.
143
156
*
144
157
* @throws NoSuchElementException if this time series is empty. */
145
- def last : TSEntry [T ]
158
+ def last : TSEntry [T ] = lastOption.get
146
159
147
160
/** Return a filled option containing the last (chronological) entry in this
148
161
* time series.
@@ -338,6 +351,55 @@ trait TimeSeries[+T] {
338
351
.foldLeft(newBuilder[Double ]())(_ += _)
339
352
.result()
340
353
354
+ /** Slides a window of size 'windowWidth' over the entries present in this series.
355
+ * It calculates some aggregate on each window that does not depend on the time of
356
+ * validity of the entries. As the given aggregator is reversible this can be done
357
+ * efficiently.
358
+ *
359
+ * Each returned entry E is calculated from the entries of the original time
360
+ * series that intersect with any window that ends in the domain of E.
361
+ *
362
+ * @note The difference between [[rollup() ]] and [[slidingWindow() ]] is that
363
+ * rollup generates disjoint slices of the time series and aggregates over those,
364
+ * whereas for sliding window an entry can be part of multiple windows.
365
+ *
366
+ * @param windowWidth width of the window
367
+ * @param aggregator a reversible aggregator to efficiently compute aggregations over the window
368
+ * @return a new series contianing all the aggregates as entries
369
+ */
370
+ def slidingWindow [U >: T , A ](
371
+ windowWidth : Long ,
372
+ aggregator : TimeUnawareReversibleAggregator [U , A ]
373
+ ): TimeSeries [A ] =
374
+ aggregateStreamToSeries(
375
+ WindowSlider .window(this .entries.toStream, windowWidth, aggregator)
376
+ )
377
+
378
+ /** See [[slidingWindow() ]]. This function slides a window and uses a time-aware
379
+ * aggregator, i.e. the aggregated values can depend on the duration of validity
380
+ * of each entry (example: average weighted by time of validity). Therefore it
381
+ * samples the entries first.
382
+ *
383
+ * @param sampleRate to resample the entries
384
+ * @param useClosestInWindow whether to sample strictly or not (see [[TimeSeries.sample() ]])
385
+ */
386
+ def slidingWindow [U >: T , A ](
387
+ windowWidth : Long ,
388
+ aggregator : TimeAwareReversibleAggregator [U , A ],
389
+ sampleRate : Long ,
390
+ useClosestInWindow : Boolean = true
391
+ ): TimeSeries [A ] =
392
+ aggregateStreamToSeries(
393
+ WindowSlider .window(this .entries.toStream, windowWidth, aggregator, sampleRate, useClosestInWindow)
394
+ )
395
+
396
+ private def aggregateStreamToSeries [A ](seq : Seq [(TSEntry [_], Option [A ])]): TimeSeries [A ] =
397
+ seq.flatMap {
398
+ // Drop the content of the window, just keep the aggregator's result.
399
+ case (entry, aggregateOpt) => aggregateOpt.map(a => entry.map(_ => a))
400
+ }.foldLeft(newBuilder[A ]())(_ += _)
401
+ .result()
402
+
341
403
/** Sample this TimeSeries at fixed time intervals of length sampleRate starting at
342
404
* the start timestamp. By default, all resulting entries will have the duration
343
405
* of sampleRate. If equal contiguous entries are compressed (set the compress flag)
@@ -372,17 +434,21 @@ trait TimeSeries[+T] {
372
434
* will generate buckets with domain (([a, b[), ([b, c[), ...)
373
435
* Note that it is wise to have 'buckets' start at a meaningfully close point in time
374
436
* relative to the time series first entry.
375
- * @return a stream of (bucket-start, timeseries ).
437
+ * @return a stream of (bucket-start, time series ).
376
438
*/
377
439
def bucket (buckets : Stream [Long ]): Stream [(Long , TimeSeries [T ])] =
378
440
TimeBucketer .bucketEntriesToTimeSeries(buckets, this .entries, newBuilder[T ]())
379
441
380
- /**
381
- * Given the passed bucket delimiters, apply 'aggregator' for each generated bucket.
442
+ /** Given the passed bucket delimiters, apply 'aggregator' for each generated bucket.
443
+ *
444
+ * Note that the timestamps and validities of the entries present in the returned
445
+ * time series are ONLY driven by the boundaries generated by 'buckets': the first
446
+ * and last entry may well be defined outside of the domain of definition of this
447
+ * time series.
382
448
*
383
- * Note that the timestamps and validities of the entries present in the returned timeseries
384
- * are ONLY driven by the boundaries generated by 'buckets': the first and last entry
385
- * may well be defined outside of the domain of definition of this time series
449
+ * @note The difference between [[ rollup() ]] and [[ slidingWindow() ]] is that
450
+ * rollup generates disjoint slices of the time series and aggregates over those,
451
+ * whereas for sliding window an entry can be part of multiple windows.
386
452
*
387
453
* @param buckets a stream generating the bucket boundaries for the rollup/aggregation
388
454
* @param aggregator a function that computes an aggregate over a time series
@@ -454,64 +520,7 @@ object TimeSeries {
454
520
* The result will be properly fitted and compressed as well.
455
521
*/
456
522
def fillGaps [T ](in : Seq [TSEntry [T ]], fillValue : T ): Seq [TSEntry [T ]] =
457
- if (in.size < 2 ) {
458
- in
459
- } else {
460
- fillMe(in, fillValue, Seq .newBuilder[TSEntry [T ]])
461
- }
462
-
463
- @ tailrec
464
- private def fillMe [T ](in : Seq [TSEntry [T ]], fillValue : T , acc : mutable.Builder [TSEntry [T ], Seq [TSEntry [T ]]]): Seq [TSEntry [T ]] =
465
- in match {
466
- case Seq (first, last) =>
467
- // Only two elements remaining: the recursion can end
468
- (acc ++= fillAndCompress(first, last, fillValue)).result()
469
- case Seq (first, second, tail @ _* ) =>
470
- // Fill the gap, and check the result
471
- fillAndCompress(first, second, fillValue) match {
472
- // the above may return 1, 2 or 3 entries,
473
- // of which the last one must not yet
474
- // be added to the accumulator,
475
- // instead it is prepended to what is passed to the recursive call
476
- case Seq (compressed) =>
477
- // Nothing to add to acc:
478
- // compressed may still be extended by the next filler
479
- fillMe(compressed +: tail, fillValue, acc)
480
- case Seq (one, two) =>
481
- // The fill value either extended 'first' or advanced 'second:
482
- // we don't need to know and just add first to acc
483
- fillMe(two +: tail, fillValue, acc += one)
484
- case Seq (_, filler, _) =>
485
- // the fill value did not extend the first,
486
- // and did not advance the second
487
- // first and filler are added to the accumulator
488
- fillMe(second +: tail, fillValue, acc ++= Seq (first, filler))
489
- }
490
- }
491
-
492
- /** Returns a Sequence of entries such that there is no discontinuity
493
- * between current.timestamp and next.definedUntil, filling the gap
494
- * between the entries and compression them if necessary. */
495
- def fillAndCompress [T ](first : TSEntry [T ], second : TSEntry [T ], fillValue : T ): Seq [TSEntry [T ]] = {
496
- if (first.definedUntil == second.timestamp) {
497
- // Entries contiguous.
498
- Seq (first, second)
499
- } else {
500
- // There is space to fill
501
- first.appendEntry(
502
- TSEntry (first.definedUntil, fillValue, second.timestamp - first.definedUntil)
503
- ) match {
504
- case Seq (single) =>
505
- // 'first' was extended.
506
- // // Check if 'second' can be compressed into the result
507
- single.appendEntry(second)
508
- case Seq (notExtended, filler) =>
509
- // 'first' was not extended.
510
- // Check if 'second' can be advanced with the filling value
511
- notExtended +: filler.appendEntry(second)
512
- }
513
- }
514
- }
523
+ TimeSeries .ofOrderedEntriesUnsafe(in).fill(fillValue).entries
515
524
516
525
/** @see [[TimeSeriesMerger.mergeEntries ]] */
517
526
def mergeEntries [A , B , C ](a : Seq [TSEntry [A ]])(b : Seq [TSEntry [B ]])(op : (Option [A ], Option [B ]) => Option [C ]): Seq [TSEntry [C ]] =
0 commit comments