sky-uk · bcarter97 · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022
diff --git a/build.sbt b/build.sbt
@@ -25,6 +25,7 @@ semanticdbEnabled  := true
 semanticdbVersion  := scalafixSemanticdb.revision
 
 tpolecatScalacOptions ++= Set(ScalacOptions.source3)
+tpolecatScalacOptions ~= (_.filterNot(Set(ScalacOptions.warnValueDiscard)))
 
 ThisBuild / scalacOptions ++= Seq("-explaintypes", "-Wconf:msg=annotation:silent")
 

diff --git a/src/main/scala/uk/sky/kafka/topicloader/TopicLoader.scala b/src/main/scala/uk/sky/kafka/topicloader/TopicLoader.scala
@@ -1,7 +1,7 @@
 package uk.sky.kafka.topicloader
 
-import java.lang.{Long => JLong}
-import java.util.{List => JList, Map => JMap, Optional}
+import java.lang.Long as JLong
+import java.util.{List as JList, Map as JMap, Optional}
 
 import akka.Done
 import akka.actor.ActorSystem
@@ -10,18 +10,19 @@ import akka.kafka.{ConsumerSettings, Subscriptions}
 import akka.stream.OverflowStrategy
 import akka.stream.scaladsl.{Flow, Keep, Source}
 import cats.data.NonEmptyList
-import cats.syntax.bifunctor._
-import cats.syntax.option._
-import cats.syntax.show._
+import cats.syntax.bifunctor.*
+import cats.syntax.option.*
+import cats.syntax.show.*
 import cats.{Bifunctor, Show}
 import com.typesafe.scalalogging.LazyLogging
-import org.apache.kafka.clients.consumer._
+import org.apache.kafka.clients.consumer.*
 import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.serialization._
+import org.apache.kafka.common.serialization.*
 import uk.sky.kafka.topicloader.config.{Config, TopicLoaderConfig}
 
 import scala.concurrent.Future
-import scala.jdk.CollectionConverters._
+import scala.jdk.CollectionConverters.*
+import scala.util.Using
 
 object TopicLoader extends TopicLoader {
   private[topicloader] case class LogOffsets(lowest: Long, highest: Long)
@@ -76,30 +77,74 @@ trait TopicLoader extends LazyLogging {
       strategy: LoadTopicStrategy,
       maybeConsumerSettings: Option[ConsumerSettings[Array[Byte], Array[Byte]]] = None
   )(implicit system: ActorSystem): Source[ConsumerRecord[K, V], Future[Consumer.Control]] = {
-    val config =
-      Config
-        .loadOrThrow(system.settings.config)
-        .topicLoader
+    val config = Config.loadOrThrow(system.settings.config).topicLoader
     load(logOffsetsForTopics(topics, strategy, config), config, maybeConsumerSettings)
   }
 
+  def partitionedLoad[K : Deserializer, V : Deserializer](
+      topics: NonEmptyList[String],
+      strategy: LoadTopicStrategy,
+      maybeConsumerSettings: Option[ConsumerSettings[Array[Byte], Array[Byte]]] = None
+  )(implicit
+      system: ActorSystem
+  ): Source[(TopicPartition, Source[ConsumerRecord[K, V], Future[Consumer.Control]]), Consumer.Control] = {
+    val config = Config.loadOrThrow(system.settings.config).topicLoader
+    Consumer
+      .plainPartitionedSource(
+        consumerSettings(maybeConsumerSettings, config),
+        Subscriptions.topics(topics.toList.toSet)
+      )
+      .buffer(config.bufferSize.value, OverflowStrategy.backpressure)
+      .idleTimeout(config.idleTimeout)
+      .map { case (partition, _) =>
+        (
+          partition,
+          load[K, V](
+            logOffsetsForPartitions(NonEmptyList.one(partition), strategy, config),
+            config,
+            maybeConsumerSettings
+          )
+        )
+      }
+  }
+
+  def partitionedLoadAndRun[K : Deserializer, V : Deserializer](
+      topics: NonEmptyList[String],
+      maybeConsumerSettings: Option[ConsumerSettings[Array[Byte], Array[Byte]]] = None
+  )(implicit
+      system: ActorSystem
+  ): Source[
+    (TopicPartition, Source[ConsumerRecord[K, V], (Future[Done], Future[Consumer.Control])]),
+    Consumer.Control
+  ] = {
+    val config = Config.loadOrThrow(system.settings.config).topicLoader
+
+    Consumer
+      .plainPartitionedSource(
+        consumerSettings(maybeConsumerSettings, config),
+        Subscriptions.topics(topics.toList.toSet)
+      )
+      .map { case (partition, _) =>
+        (
+          partition,
+          loadAndRun(
+            logOffsetsForPartitions(NonEmptyList.one(partition), LoadAll, config),
+            config,
+            maybeConsumerSettings
+          )
+        )
+      }
+  }
+
   /** Source that loads the specified topics from the beginning. When the latest current offsets are reached, the
     * materialised value is completed, and the stream continues.
     */
   def loadAndRun[K : Deserializer, V : Deserializer](
       topics: NonEmptyList[String],
       maybeConsumerSettings: Option[ConsumerSettings[Array[Byte], Array[Byte]]] = None
   )(implicit system: ActorSystem): Source[ConsumerRecord[K, V], (Future[Done], Future[Consumer.Control])] = {
-    val config            = Config.loadOrThrow(system.settings.config).topicLoader
-    val logOffsetsF       = logOffsetsForTopics(topics, LoadAll, config)
-    val postLoadingSource = Source.futureSource(logOffsetsF.map { logOffsets =>
-      val highestOffsets = logOffsets.map { case (p, o) => p -> o.highest }
-      kafkaSource[K, V](highestOffsets, config, maybeConsumerSettings)
-    }(system.dispatcher))
-
-    load[K, V](logOffsetsF, config, maybeConsumerSettings)
-      .watchTermination()(Keep.right)
-      .concatMat(postLoadingSource)(Keep.both)
+    val config = Config.loadOrThrow(system.settings.config).topicLoader
+    loadAndRun(logOffsetsForTopics(topics, LoadAll, config), config, maybeConsumerSettings)
   }
 
   protected def logOffsetsForPartitions(
@@ -157,6 +202,21 @@ trait TopicLoader extends LazyLogging {
     }
   }
 
+  protected def loadAndRun[K : Deserializer, V : Deserializer](
+      logOffsets: Future[Map[TopicPartition, LogOffsets]],
+      config: TopicLoaderConfig,
+      maybeConsumerSettings: Option[ConsumerSettings[Array[Byte], Array[Byte]]]
+  )(implicit system: ActorSystem): Source[ConsumerRecord[K, V], (Future[Done], Future[Consumer.Control])] = {
+    val postLoadingSource = Source.futureSource(logOffsets.map { logOffsets =>
+      val highestOffsets = logOffsets.map { case (p, o) => p -> o.highest }
+      kafkaSource[K, V](highestOffsets, config, maybeConsumerSettings)
+    }(system.dispatcher))
+
+    load[K, V](logOffsets, config, maybeConsumerSettings)
+      .watchTermination()(Keep.right)
+      .concatMat(postLoadingSource)(Keep.both)
+  }
+
   protected def load[K : Deserializer, V : Deserializer](
       logOffsets: Future[Map[TopicPartition, LogOffsets]],
       config: TopicLoaderConfig,
@@ -202,7 +262,7 @@ trait TopicLoader extends LazyLogging {
       startingOffsets: Map[TopicPartition, Long],
       config: TopicLoaderConfig,
       maybeConsumerSettings: Option[ConsumerSettings[Array[Byte], Array[Byte]]]
-  )(implicit system: ActorSystem) =
+  )(implicit system: ActorSystem): Source[ConsumerRecord[K, V], Consumer.Control] =
     Consumer
       .plainSource(consumerSettings(maybeConsumerSettings, config), Subscriptions.assignmentWithOffset(startingOffsets))
       .buffer(config.bufferSize.value, OverflowStrategy.backpressure)
@@ -224,11 +284,8 @@ trait TopicLoader extends LazyLogging {
 
   private def withStandaloneConsumer[T](
       settings: ConsumerSettings[Array[Byte], Array[Byte]]
-  )(f: Consumer[Array[Byte], Array[Byte]] => T): T = {
-    val consumer = settings.createKafkaConsumer()
-    try f(consumer)
-    finally consumer.close()
-  }
+  )(f: Consumer[Array[Byte], Array[Byte]] => T): T =
+    Using.resource(settings.createKafkaConsumer())(f)
 
   private def offsetsFrom(partitions: List[TopicPartition])(
       f: JList[TopicPartition] => JMap[TopicPartition, JLong]

diff --git a/src/test/scala/base/IntegrationSpecBase.scala b/src/test/scala/base/IntegrationSpecBase.scala
@@ -7,21 +7,21 @@ import akka.actor.ActorSystem
 import akka.kafka.ConsumerSettings
 import akka.util.Timeout
 import cats.data.NonEmptyList
-import cats.syntax.option._
+import cats.syntax.option.*
 import com.typesafe.config.ConfigFactory
 import io.github.embeddedkafka.Codecs.{stringDeserializer, stringSerializer}
 import io.github.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, ConsumerRecord, ConsumerRecords}
-import org.apache.kafka.clients.producer.ProducerConfig
+import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
 import org.apache.kafka.common.TopicPartition
 import org.scalatest.Assertion
 import org.scalatest.concurrent.Eventually
 import utils.RandomPort
 
 import scala.annotation.tailrec
 import scala.concurrent.duration.DurationInt
-import scala.jdk.CollectionConverters._
+import scala.jdk.CollectionConverters.*
 
 abstract class IntegrationSpecBase extends WordSpecBase with Eventually {
 
@@ -79,6 +79,11 @@ abstract class IntegrationSpecBase extends WordSpecBase with Eventually {
 
     def recordToTuple[K, V](record: ConsumerRecord[K, V]): (K, V) = (record.key(), record.value())
 
+    def sourceFromPartition[T](
+        sources: Seq[(TopicPartition, T)],
+        partition: Int
+    ): T = sources.find { case (part, _) => part.partition() == partition }.map { case (_, source) => source }.value
+
     val testTopic1          = "load-state-topic-1"
     val testTopic2          = "load-state-topic-2"
     val testTopicPartitions = 5
@@ -96,6 +101,11 @@ abstract class IntegrationSpecBase extends WordSpecBase with Eventually {
       publishToKafka(topic, messages)
       publishToKafka(topic, filler)
     }
+
+    def publishToKafka(topic: String, partition: Int, messages: Seq[(String, String)]): Unit =
+      messages.foreach { case (k, v) =>
+        publishToKafka(new ProducerRecord[String, String](topic, partition, k, v))
+      }
   }
 
   trait KafkaConsumer { this: TestContext =>

diff --git a/src/test/scala/integration/TopicLoaderIntSpec.scala b/src/test/scala/integration/TopicLoaderIntSpec.scala
@@ -2,16 +2,18 @@ package integration
 
 import java.util.concurrent.TimeoutException as JavaTimeoutException
 
+import akka.Done
 import akka.actor.ActorSystem
 import akka.kafka.ConsumerSettings
-import akka.stream.scaladsl.{Keep, Sink}
+import akka.kafka.scaladsl.Consumer
+import akka.stream.scaladsl.{Keep, Sink, Source}
 import akka.stream.testkit.scaladsl.TestSink
 import base.IntegrationSpecBase
 import cats.data.NonEmptyList
 import cats.syntax.option.*
 import com.typesafe.config.{ConfigException, ConfigFactory}
 import io.github.embeddedkafka.Codecs.{stringDeserializer, stringSerializer}
-import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
 import org.apache.kafka.common.errors.TimeoutException as KafkaTimeoutException
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.scalatest.prop.TableDrivenPropertyChecks.*
@@ -59,6 +61,32 @@ class TopicLoaderIntSpec extends IntegrationSpecBase {
           loadedRecords.map(recordToTuple) should contain theSameElementsAs published
         }
       }
+
+      "stream all records from all topics and emit a source per partition" in new TestContext {
+        val topics                         = NonEmptyList.one(testTopic1)
+        val (forPartition1, forPartition2) = records(1 to 15).splitAt(10)
+        val partitions: Long               = 2
+
+        withRunningKafka {
+          createCustomTopics(topics, partitions.toInt)
+
+          publishToKafka(testTopic1, 0, forPartition1)
+          publishToKafka(testTopic1, 1, forPartition2)
+
+          val partitionedSources =
+            TopicLoader.partitionedLoad[String, String](topics, strategy).take(partitions).runWith(Sink.seq).futureValue
+
+          sourceFromPartition(partitionedSources, 0)
+            .runWith(Sink.seq)
+            .futureValue
+            .map(recordToTuple) should contain theSameElementsAs forPartition1
+
+          sourceFromPartition(partitionedSources, 1)
+            .runWith(Sink.seq)
+            .futureValue
+            .map(recordToTuple) should contain theSameElementsAs forPartition2
+        }
+      }
     }
 
     "using LoadCommitted strategy" should {
@@ -247,6 +275,47 @@ class TopicLoaderIntSpec extends IntegrationSpecBase {
         }
       }
     }
+
+    "execute callback when finished loading and keep streaming per partition" in new TestContext {
+      val (preLoadPart1, postLoadPart1) = records(1 to 15).splitAt(10)
+      val (preLoadPart2, postLoadPart2) = records(16 to 30).splitAt(10)
+      val partitions: Long              = 2
+
+      withRunningKafka {
+        createCustomTopic(testTopic1, partitions = partitions.toInt)
+
+        publishToKafka(testTopic1, 0, preLoadPart1)
+        publishToKafka(testTopic1, 1, preLoadPart2)
+
+        val partitionedStream = TopicLoader
+          .partitionedLoadAndRun[String, String](NonEmptyList.one(testTopic1))
+          .take(partitions)
+          .runWith(Sink.seq)
+          .futureValue
+
+        def validate(
+            source: Source[ConsumerRecord[String, String], (Future[Done], Future[Consumer.Control])],
+            partition: Int,
+            preLoad: Seq[(String, String)],
+            postLoad: Seq[(String, String)]
+        ): Unit = {
+
+          val ((callback, _), recordsProbe) = source.toMat(TestSink.probe)(Keep.both).run()
+
+          recordsProbe.request(preLoad.size.toLong + postLoad.size.toLong)
+          recordsProbe.expectNextN(preLoad.size.toLong).map(recordToTuple) shouldBe preLoad
+
+          whenReady(callback) { _ =>
+            publishToKafka(testTopic1, partition, postLoad)
+
+            recordsProbe.expectNextN(postLoad.size.toLong).map(recordToTuple) shouldBe postLoad
+          }
+        }
+
+        validate(sourceFromPartition(partitionedStream, 0), 0, preLoadPart1, postLoadPart1)
+        validate(sourceFromPartition(partitionedStream, 1), 1, preLoadPart2, postLoadPart2)
+      }
+    }
   }
 
   "consumerSettings" should {