|
51 | 51 | CloudPickleSerializer, CompressedSerializer
|
52 | 52 | from pyspark.shuffle import Aggregator, InMemoryMerger, ExternalMerger, ExternalSorter
|
53 | 53 | from pyspark.sql import SQLContext, IntegerType, Row, ArrayType, StructType, StructField, \
|
54 |
| - UserDefinedType, DoubleType |
| 54 | + UserDefinedType, DoubleType, LongType, _infer_type |
55 | 55 | from pyspark import shuffle
|
56 | 56 |
|
57 | 57 | _have_scipy = False
|
@@ -923,6 +923,20 @@ def test_infer_schema(self):
|
923 | 923 | result = self.sqlCtx.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
|
924 | 924 | self.assertEqual(1, result.first()[0])
|
925 | 925 |
|
| 926 | + def test_infer_long_type(self): |
| 927 | + longrow = [Row(f1='a', f2=100000000000000)] |
| 928 | + lrdd = self.sc.parallelize(longrow) |
| 929 | + slrdd = self.sqlCtx.inferSchema(lrdd) |
| 930 | + self.assertEqual(slrdd.schema().fields[1].dataType, LongType()) |
| 931 | + |
| 932 | + self.assertEqual(_infer_type(1), IntegerType()) |
| 933 | + self.assertEqual(_infer_type(2**10), IntegerType()) |
| 934 | + self.assertEqual(_infer_type(2**20), IntegerType()) |
| 935 | + self.assertEqual(_infer_type(2**31 - 1), IntegerType()) |
| 936 | + self.assertEqual(_infer_type(2**31), LongType()) |
| 937 | + self.assertEqual(_infer_type(2**61), LongType()) |
| 938 | + self.assertEqual(_infer_type(2**71), LongType()) |
| 939 | + |
926 | 940 | def test_convert_row_to_dict(self):
|
927 | 941 | row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})
|
928 | 942 | self.assertEqual(1, row.asDict()['l'][0].a)
|
|
0 commit comments