@@ -758,6 +758,32 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
758
758
rdd.saveAsHadoopDataset(conf)
759
759
}
760
760
761
+ /**
762
+ * Repartition the RDD according to the given partitioner and, within each resulting partition,
763
+ * sort records by their keys.
764
+ *
765
+ * This is more efficient than calling `repartition` and then sorting within each partition
766
+ * because it can push the sorting down into the shuffle machinery.
767
+ */
768
+ def repartitionAndSortWithinPartition (partitioner : Partitioner ): JavaPairRDD [K , V ] = {
769
+ val comp = com.google.common.collect.Ordering .natural().asInstanceOf [Comparator [K ]]
770
+ repartitionAndSortWithinPartition(partitioner, comp)
771
+ }
772
+
773
+ /**
774
+ * Repartition the RDD according to the given partitioner and, within each resulting partition,
775
+ * sort records by their keys.
776
+ *
777
+ * This is more efficient than calling `repartition` and then sorting within each partition
778
+ * because it can push the sorting down into the shuffle machinery.
779
+ */
780
+ def repartitionAndSortWithinPartition (partitioner : Partitioner , comp : Comparator )
781
+ : JavaPairRDD [K , V ] = {
782
+ implicit val ordering = comp // Allow implicit conversion of Comparator to Ordering.
783
+ fromRDD(
784
+ new OrderedRDDFunctions [K , V , (K , V )](rdd).repartitionAndSortWithinPartition(partitioner))
785
+ }
786
+
761
787
/**
762
788
* Sort the RDD by key, so that each partition contains a sorted range of the elements in
763
789
* ascending order. Calling `collect` or `save` on the resulting RDD will return or output an
0 commit comments