Skip to content

Commit 7dc92c8

Browse files
committed
fixed some comments
1 parent 725cd47 commit 7dc92c8

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

python/pyspark/serializers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ def loads(self, obj):
187187

188188
class ArrowCollectSerializer(Serializer):
189189
"""
190-
Deserialize a stream of batches followed by batch order information.
190+
Deserialize a stream of batches followed by batch order information. Used in
191+
DataFrame._collectAsArrow() after invoking Dataset.collectAsArrowToPython() in the JVM.
191192
"""
192193

193194
def __init__(self):
@@ -198,9 +199,8 @@ def dump_stream(self, iterator, stream):
198199

199200
def load_stream(self, stream):
200201
"""
201-
Load a stream of un-ordered Arrow RecordBatches, where the last
202-
iteration will yield a list of indices to put the RecordBatches in
203-
the correct order.
202+
Load a stream of un-ordered Arrow RecordBatches, where the last iteration yields
203+
a list of indices that can be used to put the RecordBatches in the correct order.
204204
"""
205205
# load the batches
206206
for batch in self.serializer.load_stream(stream):

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3179,7 +3179,7 @@ class Dataset[T] private[sql](
31793179
val batchOrder = new ArrayBuffer[(Int, Int)]()
31803180
var partitionCount = 0
31813181

3182-
// Handler to eagerly write batches to Python out of order
3182+
// Handler to eagerly write batches to Python un-ordered
31833183
def handlePartitionBatches(index: Int, arrowBatches: Array[Array[Byte]]): Unit = {
31843184
if (arrowBatches.nonEmpty) {
31853185
// Write all batches (can be more than 1) in the partition, store the batch order tuple

0 commit comments

Comments
 (0)