Skip to content

Commit 73c5d79

Browse files
committed
Python type errors should print type, not object
* Replaced some related exceptions with TypeError
1 parent 0424da6 commit 73c5d79

File tree

10 files changed

+23
-21
lines changed

10 files changed

+23
-21
lines changed

python/pyspark/accumulators.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
>>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
8484
Traceback (most recent call last):
8585
...
86-
Exception:...
86+
TypeError:...
8787
"""
8888

8989
import sys

python/pyspark/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ def accumulator(self, value, accum_param=None):
671671
elif isinstance(value, complex):
672672
accum_param = accumulators.COMPLEX_ACCUMULATOR_PARAM
673673
else:
674-
raise Exception("No default accumulator param for type %s" % type(value))
674+
raise TypeError("No default accumulator param for type %s" % type(value))
675675
SparkContext._next_accum_id += 1
676676
return Accumulator(SparkContext._next_accum_id - 1, value, accum_param)
677677

python/pyspark/ml/param/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class Param(object):
3030

3131
def __init__(self, parent, name, doc):
3232
if not isinstance(parent, Params):
33-
raise ValueError("Parent must be a Params but got type %s." % type(parent).__name__)
33+
raise TypeError("Parent must be a Params but got type %s." % type(parent))
3434
self.parent = parent
3535
self.name = str(name)
3636
self.doc = str(doc)

python/pyspark/ml/pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ def fit(self, dataset, params={}):
131131
stages = paramMap[self.stages]
132132
for stage in stages:
133133
if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)):
134-
raise ValueError(
135-
"Cannot recognize a pipeline stage of type %s." % type(stage).__name__)
134+
raise TypeError(
135+
"Cannot recognize a pipeline stage of type %s." % type(stage))
136136
indexOfLastEstimator = -1
137137
for i, stage in enumerate(stages):
138138
if isinstance(stage, Estimator):

python/pyspark/mllib/linalg.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def serialize(self, obj):
145145
values = [float(v) for v in obj]
146146
return (1, None, None, values)
147147
else:
148-
raise ValueError("cannot serialize %r of type %r" % (obj, type(obj)))
148+
raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))
149149

150150
def deserialize(self, datum):
151151
assert len(datum) == 4, \
@@ -561,7 +561,7 @@ def __getitem__(self, index):
561561
inds = self.indices
562562
vals = self.values
563563
if not isinstance(index, int):
564-
raise ValueError(
564+
raise TypeError(
565565
"Indices must be of type integer, got type %s" % type(index))
566566
if index < 0:
567567
index += self.size

python/pyspark/mllib/regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
170170
from pyspark.mllib.classification import LogisticRegressionModel
171171
first = data.first()
172172
if not isinstance(first, LabeledPoint):
173-
raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
173+
raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
174174
if initial_weights is None:
175175
initial_weights = [0.0] * len(data.first().features)
176176
if (modelClass == LogisticRegressionModel):

python/pyspark/mllib/tests.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ def test_sparse_vector_indexing(self):
135135
self.assertEquals(sv[-1], 2)
136136
self.assertEquals(sv[-2], 0)
137137
self.assertEquals(sv[-4], 0)
138-
for ind in [4, -5, 7.8]:
138+
for ind in [4, -5]:
139139
self.assertRaises(ValueError, sv.__getitem__, ind)
140+
for ind in [7.8, '1']:
141+
self.assertRaises(TypeError, sv.__getitem__, ind)
140142

141143
def test_matrix_indexing(self):
142144
mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
@@ -450,7 +452,7 @@ def test_infer_schema(self):
450452
elif isinstance(v, DenseVector):
451453
self.assertEqual(v, self.dv1)
452454
else:
453-
raise ValueError("expecting a vector but got %r of type %r" % (v, type(v)))
455+
raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
454456

455457

456458
@unittest.skipIf(not _have_scipy, "SciPy not installed")

python/pyspark/sql/_types.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,8 @@ def _infer_type(obj):
562562
else:
563563
try:
564564
return _infer_schema(obj)
565-
except ValueError:
566-
raise ValueError("not supported type: %s" % type(obj))
565+
except TypeError:
566+
raise TypeError("not supported type: %s" % type(obj))
567567

568568

569569
def _infer_schema(row):
@@ -584,7 +584,7 @@ def _infer_schema(row):
584584
items = sorted(row.__dict__.items())
585585

586586
else:
587-
raise ValueError("Can not infer schema for type: %s" % type(row))
587+
raise TypeError("Can not infer schema for type: %s" % type(row))
588588

589589
fields = [StructField(k, _infer_type(v), True) for k, v in items]
590590
return StructType(fields)
@@ -696,7 +696,7 @@ def _merge_type(a, b):
696696
return a
697697
elif type(a) is not type(b):
698698
# TODO: type cast (such as int -> long)
699-
raise TypeError("Can not merge type %s and %s" % (a, b))
699+
raise TypeError("Can not merge type %s and %s" % (type(a), type(b)))
700700

701701
# same type
702702
if isinstance(a, StructType):
@@ -773,7 +773,7 @@ def convert_struct(obj):
773773
elif hasattr(obj, "__dict__"): # object
774774
d = obj.__dict__
775775
else:
776-
raise ValueError("Unexpected obj: %s" % obj)
776+
raise TypeError("Unexpected obj type: %s" % type(obj))
777777

778778
if convert_fields:
779779
return tuple([conv(d.get(name)) for name, conv in zip(names, converters)])
@@ -912,7 +912,7 @@ def _infer_schema_type(obj, dataType):
912912
return StructType(fields)
913913

914914
else:
915-
raise ValueError("Unexpected dataType: %s" % dataType)
915+
raise TypeError("Unexpected dataType: %s" % type(dataType))
916916

917917

918918
_acceptable_types = {

python/pyspark/sql/context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def applySchema(self, rdd, schema):
208208
raise TypeError("Cannot apply schema to DataFrame")
209209

210210
if not isinstance(schema, StructType):
211-
raise TypeError("schema should be StructType, but got %s" % schema)
211+
raise TypeError("schema should be StructType, but got %s" % type(schema))
212212

213213
return self.createDataFrame(rdd, schema)
214214

@@ -281,7 +281,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
281281
# data could be list, tuple, generator ...
282282
rdd = self._sc.parallelize(data)
283283
except Exception:
284-
raise ValueError("cannot create an RDD from type: %s" % type(data))
284+
raise TypeError("cannot create an RDD from type: %s" % type(data))
285285
else:
286286
rdd = data
287287

@@ -293,8 +293,8 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
293293
if isinstance(schema, (list, tuple)):
294294
first = rdd.first()
295295
if not isinstance(first, (list, tuple)):
296-
raise ValueError("each row in `rdd` should be list or tuple, "
297-
"but got %r" % type(first))
296+
raise TypeError("each row in `rdd` should be list or tuple, "
297+
"but got %r" % type(first))
298298
row_cls = Row(*schema)
299299
schema = self._inferSchema(rdd.map(lambda r: row_cls(*r)), samplingRatio)
300300

python/pyspark/sql/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,7 @@ def __getitem__(self, item):
608608
jc = self._jdf.apply(self.columns[item])
609609
return Column(jc)
610610
else:
611-
raise TypeError("unexpected type: %s" % type(item))
611+
raise TypeError("unexpected item type: %s" % type(item))
612612

613613
def __getattr__(self, name):
614614
"""Returns the :class:`Column` denoted by ``name``.

0 commit comments

Comments
 (0)