fixed some comments

BryanCutler · BryanCutler · commit 7dc92c8d0dca · 2018-11-08T15:52:26.000-08:00
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
@@ -187,7 +187,8 @@ def loads(self, obj):
 
 class ArrowCollectSerializer(Serializer):
     """
-    Deserialize a stream of batches followed by batch order information.
+    Deserialize a stream of batches followed by batch order information. Used in
+    DataFrame._collectAsArrow() after invoking Dataset.collectAsArrowToPython() in the JVM.
     """
 
     def __init__(self):
@@ -198,9 +199,8 @@ def dump_stream(self, iterator, stream):
 
     def load_stream(self, stream):
         """
-        Load a stream of un-ordered Arrow RecordBatches, where the last
-        iteration will yield a list of indices to put the RecordBatches in
-        the correct order.
+        Load a stream of un-ordered Arrow RecordBatches, where the last iteration yields
+        a list of indices that can be used to put the RecordBatches in the correct order.
         """
         # load the batches
         for batch in self.serializer.load_stream(stream):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -3179,7 +3179,7 @@ class Dataset[T] private[sql](
         val batchOrder = new ArrayBuffer[(Int, Int)]()
         var partitionCount = 0
 
-        // Handler to eagerly write batches to Python out of order
+        // Handler to eagerly write batches to Python un-ordered
         def handlePartitionBatches(index: Int, arrowBatches: Array[Array[Byte]]): Unit = {
           if (arrowBatches.nonEmpty) {
             // Write all batches (can be more than 1) in the partition, store the batch order tuple