Sqooba
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/NumericTimeSeries.scala
+2-2 b/‎src/main/scala/io/sqooba/oss/timeseries/NumericTimeSeries.scala
+2-2
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/archive/package.scala
+1-1 b/‎src/main/scala/io/sqooba/oss/timeseries/archive/package.scala
+1-1
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/immutable/ColumnTimeSeries.scala
-13 b/‎src/main/scala/io/sqooba/oss/timeseries/immutable/ColumnTimeSeries.scala
-13
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/window/Aggregator.scala
+88 b/‎src/main/scala/io/sqooba/oss/timeseries/window/Aggregator.scala
+88
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/window/BasicAggregators.scala
+136 b/‎src/main/scala/io/sqooba/oss/timeseries/window/BasicAggregators.scala
+136
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/window/IntegralAggregator.scala
+33 b/‎src/main/scala/io/sqooba/oss/timeseries/window/IntegralAggregator.scala
+33
diff --git a/‎src/main/scala/io/sqooba/oss/timeseries/window/ReversibleAggregator.scala
+80 b/‎src/main/scala/io/sqooba/oss/timeseries/window/ReversibleAggregator.scala
+80
@@ -3,7 +3,7 @@ package io.sqooba.oss.timeseries
 import java.util.concurrent.TimeUnit
 
 import io.sqooba.oss.timeseries.immutable.TSEntry
-import io.sqooba.oss.timeseries.windowing.{IntegratingAggregator, WindowSlider}
+import io.sqooba.oss.timeseries.window.{Aggregator, IntegralAggregator, WindowSlider}
 
 import scala.annotation.tailrec
 import scala.collection.mutable.Builder
@@ -102,7 +102,7 @@ object NumericTimeSeries {
       .window(
         entries.toStream,
         window,
-        new IntegratingAggregator[T](timeUnit)
+        Aggregator.integral[T](timeUnit)
       )
       .map {
         // Drop the content of the window, just keep the integral's result.
 
@@ -6,7 +6,7 @@ import fi.iki.yak.ts.compression.gorilla.{GorillaDecompressor, LongArrayInput}
 
 import scala.util.Try
 
-/** Provides abstraction and tools for compressing/archiving timeseries data. The
+/** Provides abstraction and tools for compressing/archiving time series data. The
   * compression used is Gorilla TSC encoding implemented by the Java library
   * [[fi.iki.yak.ts.compression.gorilla]].
   *
 
@@ -222,19 +222,6 @@ case class ColumnTimeSeries[+T] private (
       }
     }
 
-  override def slidingIntegral[U >: T](
-      window: Long,
-      timeUnit: TimeUnit = TimeUnit.MILLISECONDS
-  )(implicit n: Numeric[U]): TimeSeries[Double] =
-    if (this.size < 2) {
-      this.map[Double](n.toDouble)
-    } else {
-      // TODO: have slidingSum return compressed output so we can use the unsafe constructor and save an iteration.
-      // TODO: don't use entries but directly operate on the column vectors.
-      ColumnTimeSeries
-        .ofOrderedEntriesSafe(NumericTimeSeries.slidingIntegral[U](this.entries, window, timeUnit))
-    }
-
   def looseDomain: TimeDomain = ContiguousTimeDomain(timestamps.head, timestamps.last + validities.last)
 
   lazy val supportRatio: Double = validities.sum.toDouble / looseDomain.size
 
@@ -0,0 +1,88 @@
+package io.sqooba.oss.timeseries.window
+
+import io.sqooba.oss.timeseries.immutable.TSEntry
+
+import scala.collection.immutable.Queue
+import scala.concurrent.duration.TimeUnit
+
+/** Tooling and state wrapper to efficiently compute aggregates over sliding windows,
+  * in contexts where this is possible. Instead of aggregating huge slices of a time
+  * series, this class iteratively calculates the aggregated value when an entry is
+  * added. This allows to only store the summed value instead of all the entries in
+  * the case of addition, for example.
+  *
+  * The aggregator will be applied sequentially, so it may keep track of any state
+  * from one entry the next.
+  *
+  * @tparam T the type of the entries being aggregated over
+  * @tparam A the type of the aggregated value
+  */
+trait Aggregator[T, A] {
+
+  /** @return the current aggregated value or None */
+  def currentValue: Option[A]
+
+  /** Update the internal aggregated value according to the entry that is about
+    * to be added to the window.
+    *
+    * @note By default this ignores the currentWindow and passes the entry to
+    *       the function that only takes the entry. If you want to use the
+    *       entire window in the aggregaton you can override this method.
+    *
+    * @param e             the entry that is about to enter the window
+    * @param currentWindow the current content of the window: it does not
+    *                      include 'e' at this point.
+    */
+  // TODO: consider returning the resulting aggregated value?
+  def addEntry(e: TSEntry[T], currentWindow: Queue[TSEntry[T]]): Unit =
+    addEntry(e)
+
+  /** Update the internal aggregated value according to the entry that is about
+    * to be added to the window.
+    *
+    * @param e             the entry that is about to enter the window
+    */
+  // TODO: consider returning the resulting aggregated value?
+  def addEntry(e: TSEntry[T]): Unit
+}
+
+object Aggregator {
+
+  /** Factory for aggregators that need to act on the entire window, like median.
+    *
+    * @note Aggregating this way is a lot less efficient for computations that only
+    * need little intermediary state. Rather define your own Aggregator for those
+    * cases (see for example [[SumAggregator]], [[MinAggregator]].
+    *
+    * @param f aggregation function from a sequence of entries to an option
+    * @return a reversible aggregator
+    */
+  def queueAggregator[T, A](f: Seq[T] => Option[A]): QueueAggregator[T, A] =
+    new QueueAggregator[T, A] {
+      override def currentValue: Option[A] = f(queue.toSeq)
+    }
+
+  /** See [[SumAggregator]] */
+  def sum[T: Numeric]: SumAggregator[T] = new SumAggregator[T]()
+
+  /** See [[MeanAggregator]] */
+  def mean[T: Numeric]: MeanAggregator[T] = new MeanAggregator[T]()
+
+  /** Aggregator that returns the minimum of all values in the window.
+    * See [[MinAggregator]].
+    */
+  def min[T: Ordering]: TimeUnawareReversibleAggregator[T, T] = new MinAggregator[T]()
+
+  /** Aggregator that returns the maximum of all values in the window.
+    * See [[MinAggregator]].
+    */
+  def max[T](implicit ordering: Ordering[T]): TimeUnawareReversibleAggregator[T, T] =
+    new MinAggregator[T]()(ordering.reverse)
+
+  /** See [[StdAggregator]] */
+  def std[T: Numeric]: StdAggregator[T] = new StdAggregator[T]()
+
+  /** See [[IntegralAggregator]] */
+  def integral[T: Numeric](timeunit: TimeUnit, initialValue: Double = .0): IntegralAggregator[T] =
+    new IntegralAggregator[T](timeunit, initialValue)
+}
@@ -0,0 +1,136 @@
+package io.sqooba.oss.timeseries.window
+
+import io.sqooba.oss.timeseries.immutable.TSEntry
+
+import scala.collection.mutable
+
+/** An aggregator that does strictly nothing. Used for window creation without aggregation. */
+object DoNothingAggregator extends TimeUnawareReversibleAggregator[Nothing, Nothing] {
+
+  def currentValue: Option[Nothing] = None
+
+  def addEntry(entry: TSEntry[Nothing]): Unit = ()
+
+  def dropEntry(entry: TSEntry[Nothing]): Unit = ()
+}
+
+/** A reversible aggregator that keeps track of the total sum of the values
+  * present in each entry that is at least partially within the window's domain.
+  *
+  * Discontinuities in the domain of definition between entries are completely ignored.
+  */
+class SumAggregator[T](implicit n: Numeric[T]) extends TimeUnawareReversibleAggregator[T, T] {
+  import n._
+
+  private var sum = n.zero
+
+  def currentValue: Option[T] = Some(sum)
+
+  def addEntry(entry: TSEntry[T]): Unit =
+    sum += entry.value
+
+  def dropEntry(entry: TSEntry[T]): Unit =
+    sum -= entry.value
+
+}
+
+/** Reversible aggregator that calculates the mean of the values in the window
+  * weighted by time of validity. It is therefore time-aware and needs entries
+  * to be contained in the window.
+  */
+class MeanAggregator[T](implicit n: Numeric[T]) extends TimeAwareReversibleAggregator[T, Double] {
+  import n._
+
+  // Sum of X_i * d_i
+  private var sum = .0
+  // Sum of d_i
+  private var durations: Long = 0
+
+  def currentValue: Option[Double] =
+    // Sum of X_i * d_i / Sum of d_i or None
+    if (durations > 0) Some(sum / durations) else None
+
+  def addEntry(entry: TSEntry[T]): Unit = {
+    sum += weighted(entry)
+    durations += entry.validity
+  }
+
+  def dropEntry(entry: TSEntry[T]): Unit = {
+    sum -= weighted(entry)
+    durations -= entry.validity
+  }
+
+  private def weighted(e: TSEntry[T]): Double = e.value.toDouble * e.validity
+}
+
+/** Reversible aggregator that calculates the (biased) standard deviation (e.g.
+  * square root of the variance) of the values in the window, weighted by time of
+  * validity. It is therefore time-aware and needs entries to be contained in the
+  * window.
+  */
+class StdAggregator[T](implicit n: Numeric[T]) extends TimeAwareReversibleAggregator[T, Double] {
+  import n._
+
+  // Weighted mean of squares E_w[X^2]
+  private val squareMean = new MeanAggregator[T]()
+  // Weighted mean E_w[X]
+  private val mean = new MeanAggregator[T]()
+
+  def currentValue: Option[Double] =
+    for {
+      mean    <- mean.currentValue
+      squares <- squareMean.currentValue
+    } yield
+    // std = sqrt{ E_w[X^2] - E_w[X]^2 }
+    Math.sqrt(squares - mean * mean)
+
+  def addEntry(entry: TSEntry[T]): Unit = {
+    squareMean.addEntry(entry.map(v => v * v))
+    mean.addEntry(entry)
+  }
+
+  def dropEntry(entry: TSEntry[T]): Unit = {
+    squareMean.dropEntry(entry.map(v => v * v))
+    mean.dropEntry(entry)
+  }
+}
+
+/** A reversible aggregator that keeps track of the minimum of the values
+  * present in the window. You can get a maximum aggregator by simply
+  * reversing the ordering passed as an implicit.
+  *
+  * The aggregator uses an ordered internal queue and discards values that
+  * can never be the minimum.
+  */
+class MinAggregator[T](implicit n: Ordering[T]) extends TimeUnawareReversibleAggregator[T, T] {
+  import n._
+
+  private var minQueue = mutable.Queue[T]()
+
+  override def currentValue: Option[T] = minQueue.headOption
+
+  def addEntry(e: TSEntry[T]): Unit = {
+    // In Scala 2.13, this can be more elegantly solved:
+    //  minQueue.takeWhileInPlace(_ <= e.value).append(e.value)
+    minQueue = minQueue.takeWhile(_ <= e.value)
+    minQueue.enqueue(e.value)
+  }
+
+  def dropEntry(entry: TSEntry[T]): Unit = {
+    if (minQueue.head == entry.value) minQueue.dequeue()
+  }
+}
+
+/** Template aggregator that keeps track of the entire window. It is therefore not
+  * efficient for most calculations.
+  */
+abstract class QueueAggregator[T, A] extends TimeUnawareReversibleAggregator[T, A] {
+
+  private[window] val queue: mutable.Queue[T] = mutable.Queue.empty
+
+  def addEntry(e: TSEntry[T]): Unit =
+    queue += e.value
+
+  def dropEntry(entry: TSEntry[T]): Unit =
+    queue.dequeue()
+}
@@ -0,0 +1,33 @@
+package io.sqooba.oss.timeseries.window
+
+import io.sqooba.oss.timeseries.immutable.TSEntry
+
+import scala.collection.immutable.Queue
+import scala.concurrent.duration.TimeUnit
+
+/** An aggregator that relies on the passed entries' integral function.
+  *
+  * Very similar to the SummingAggregator, but takes the validity of each entry into
+  * account. It is therefore time-aware and needs entries to be contained in the
+  * window.
+  *
+  * @param timeunit that will be passe to the entries' integral function
+  * @param initialValue to initialise the aggregator with.
+  */
+class IntegralAggregator[T](
+    timeunit: TimeUnit,
+    initialValue: Double = .0
+)(implicit n: Numeric[T])
+    extends TimeAwareReversibleAggregator[T, Double] {
+
+  private var integral = initialValue
+
+  def currentValue: Option[Double] = Some(integral)
+
+  def addEntry(entry: TSEntry[T]): Unit =
+    integral += entry.integral(timeunit)
+
+  def dropEntry(entry: TSEntry[T]): Unit =
+    integral -= entry.integral(timeunit)
+
+}
@@ -0,0 +1,80 @@
+package io.sqooba.oss.timeseries.window
+
+import io.sqooba.oss.timeseries.immutable.TSEntry
+
+import scala.collection.immutable.Queue
+
+/** Extension to the Aggregator that also supports removing entries from the
+  * aggregated value. Assuming we want to aggregate the content of a window, and to
+  * do so for each different window returned by WindowSlider, many iterations will
+  * be required.
+  *
+  * Depending on the aggregation function, this is however not required: For simple
+  * cases like addition or multiplication and any situation where the contributions
+  * of a single entry to the aggregated value may be reversed, we can compute an
+  * aggregated value for each window in linear time.
+  *
+  * The reversible aggregator will be applied sequentially, so it may keep track of
+  * any state from one addition or removal to the next.
+  *
+  * Some aggregations depend on the duration of the entries like integration or
+  * averaging, others like min max don't. To keep those types of aggregations well
+  * separated, implementations need to extend either the time-aware or the
+  * time-unaware subtrait. This allows us to use different windowing functions for
+  * the two types.
+  *
+  * @tparam T the type of the entries being aggregated over
+  * @tparam A the type of the aggregated value
+  */
+sealed trait ReversibleAggregator[T, A] extends Aggregator[T, A] {
+
+  /** Updates the aggregated value according to the fact that
+    * the head of the currentWindow is being removed.
+    *
+    * @param currentWindow the current content of the window. It still
+    *                      contains the entry that has to be removed
+    */
+  // TODO: consider returning the resulting aggregated value?
+  def dropHead(currentWindow: Queue[TSEntry[T]]): Unit =
+    dropEntry(currentWindow.head)
+
+  /** Updates the aggregated value according to the fact that
+    * this entry is being removed.
+    *
+    * @param entry to remove from the head of the window
+    */
+  // TODO: consider returning the resulting aggregated value?
+  def dropEntry(entry: TSEntry[T]): Unit
+
+  /** Combine the addition and the removal of entries from the aggregated value.
+    *
+    * @param add the value that will be added
+    * @param currentWindow the current window, from which we will drop the first entry.
+    *                      Note that it does not yet contain 'add'
+    */
+  def addAndDrop(add: TSEntry[T], currentWindow: Queue[TSEntry[T]]): Unit = {
+    dropHead(currentWindow)
+    // addEntry needs to work on the updated window
+    addEntry(add, currentWindow.tail)
+  }
+
+  /** Combine the addition and the removal of entries from the aggregated value.
+    *
+    * @param add the entry that will be added at the tail
+    * @param remove the entry that will be removed at the head
+    */
+  def addAndDrop(add: TSEntry[T], remove: TSEntry[T]): Unit = {
+    dropEntry(remove)
+    addEntry(add)
+  }
+}
+
+/** This trait should be extended by all aggregators that depend on the time/duration
+  * in their calculation like integration, averaging over time etc.
+  */
+trait TimeAwareReversibleAggregator[T, A] extends ReversibleAggregator[T, A]
+
+/** This trait should be extended by all aggregators that don't depend on the
+  * duration in their calculation like min, max, median.
+  */
+trait TimeUnawareReversibleAggregator[T, A] extends ReversibleAggregator[T, A]
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ import fi.iki.yak.ts.compression.gorilla.{GorillaDecompressor, LongArrayInput}`
`6`	`6`
`7`	`7`	`import scala.util.Try`
`8`	`8`
`9`		`-/** Provides abstraction and tools for compressing/archiving timeseries data. The`
	`9`	`+/** Provides abstraction and tools for compressing/archiving time series data. The`
`10`	`10`	`* compression used is Gorilla TSC encoding implemented by the Java library`
`11`	`11`	`* [[fi.iki.yak.ts.compression.gorilla]].`
`12`	`12`	`*`