Skip to content

Commit 983d259

Browse files
committed
[SPARK-2954] Fix MLlib _deserialize_double on Python 2.6.
1 parent 5d18fd7 commit 983d259

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

python/pyspark/mllib/_common.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#
1717

1818
import struct
19+
import sys
1920
import numpy
2021
from numpy import ndarray, float64, int64, int32, array_equal, array
2122
from pyspark import SparkContext, RDD
@@ -78,6 +79,14 @@
7879
LABELED_POINT_MAGIC = 4
7980

8081

82+
# Workaround for SPARK-2954: before Python 2.7, struct.unpack couldn't unpack bytearray()s.
83+
if sys.version_info[:2] <= (2, 6):
84+
def _unpack(fmt, string):
85+
return struct.unpack(fmt, buffer(string))
86+
else:
87+
_unpack = struct.unpack
88+
89+
8190
def _deserialize_numpy_array(shape, ba, offset, dtype=float64):
8291
"""
8392
Deserialize a numpy array of the given type from an offset in
@@ -191,7 +200,7 @@ def _deserialize_double(ba, offset=0):
191200
raise TypeError("_deserialize_double called on a %s; wanted bytearray" % type(ba))
192201
if len(ba) - offset != 8:
193202
raise TypeError("_deserialize_double called on a %d-byte array; wanted 8 bytes." % nb)
194-
return struct.unpack("d", ba[offset:])[0]
203+
return _unpack("d", ba[offset:])[0]
195204

196205

197206
def _deserialize_double_vector(ba, offset=0):

0 commit comments

Comments
 (0)