16
16
*/
17
17
package org .apache .spark .sql .catalyst .encoders
18
18
19
+ import java .{sql => jsql }
19
20
import java .math .{BigDecimal => JBigDecimal , BigInteger => JBigInt }
20
21
import java .time .{Duration , Instant , LocalDate , LocalDateTime , Period }
21
22
22
23
import scala .reflect .{classTag , ClassTag }
23
24
24
- import org .apache .spark .sql .Encoder
25
+ import org .apache .spark .sql .{ Encoder , Row }
25
26
import org .apache .spark .sql .types ._
26
27
import org .apache .spark .unsafe .types .CalendarInterval
27
28
28
29
/**
29
30
* A non implementation specific encoder. This encoder containers all the information needed
30
31
* to generate an implementation specific encoder (e.g. InternalRow <=> Custom Object).
32
+ *
33
+ * The input of the serialization does not need to match the external type of the encoder. This is
34
+ * called lenient serialization. An example of this is lenient date serialization, in this case both
35
+ * [[java.sql.Date ]] and [[java.time.LocalDate ]] are allowed. Deserialization is never lenient; it
36
+ * will always produce instance of the external type.
31
37
*/
32
38
trait AgnosticEncoder [T ] extends Encoder [T ] {
33
39
def isPrimitive : Boolean
34
40
def nullable : Boolean = ! isPrimitive
35
41
def dataType : DataType
36
42
override def schema : StructType = StructType (StructField (" value" , dataType, nullable) :: Nil )
43
+ def lenientSerialization : Boolean = false
37
44
}
38
45
39
- // TODO check RowEncoder
40
- // TODO check BeanEncoder
41
46
object AgnosticEncoders {
42
47
case class OptionEncoder [E ](elementEncoder : AgnosticEncoder [E ])
43
48
extends AgnosticEncoder [Option [E ]] {
@@ -46,35 +51,48 @@ object AgnosticEncoders {
46
51
override val clsTag : ClassTag [Option [E ]] = ClassTag (classOf [Option [E ]])
47
52
}
48
53
49
- case class ArrayEncoder [E ](element : AgnosticEncoder [E ])
54
+ case class ArrayEncoder [E ](element : AgnosticEncoder [E ], containsNull : Boolean )
50
55
extends AgnosticEncoder [Array [E ]] {
51
56
override def isPrimitive : Boolean = false
52
- override def dataType : DataType = ArrayType (element.dataType, element.nullable )
57
+ override def dataType : DataType = ArrayType (element.dataType, containsNull )
53
58
override val clsTag : ClassTag [Array [E ]] = element.clsTag.wrap
54
59
}
55
60
56
- case class IterableEncoder [C <: Iterable [E ], E ](
61
+ /**
62
+ * Encoder for collections.
63
+ *
64
+ * This encoder can be lenient for [[Row ]] encoders. In that case we allow [[Seq ]], primitive
65
+ * array (if any), and generic arrays as input.
66
+ */
67
+ case class IterableEncoder [C , E ](
57
68
override val clsTag : ClassTag [C ],
58
- element : AgnosticEncoder [E ])
69
+ element : AgnosticEncoder [E ],
70
+ containsNull : Boolean ,
71
+ override val lenientSerialization : Boolean )
59
72
extends AgnosticEncoder [C ] {
60
73
override def isPrimitive : Boolean = false
61
- override val dataType : DataType = ArrayType (element.dataType, element.nullable )
74
+ override val dataType : DataType = ArrayType (element.dataType, containsNull )
62
75
}
63
76
64
77
case class MapEncoder [C , K , V ](
65
78
override val clsTag : ClassTag [C ],
66
79
keyEncoder : AgnosticEncoder [K ],
67
- valueEncoder : AgnosticEncoder [V ])
80
+ valueEncoder : AgnosticEncoder [V ],
81
+ valueContainsNull : Boolean )
68
82
extends AgnosticEncoder [C ] {
69
83
override def isPrimitive : Boolean = false
70
84
override val dataType : DataType = MapType (
71
85
keyEncoder.dataType,
72
86
valueEncoder.dataType,
73
- valueEncoder.nullable )
87
+ valueContainsNull )
74
88
}
75
89
76
- case class EncoderField (name : String , enc : AgnosticEncoder [_]) {
77
- def structField : StructField = StructField (name, enc.dataType, enc.nullable)
90
+ case class EncoderField (
91
+ name : String ,
92
+ enc : AgnosticEncoder [_],
93
+ nullable : Boolean ,
94
+ metadata : Metadata ) {
95
+ def structField : StructField = StructField (name, enc.dataType, nullable, metadata)
78
96
}
79
97
80
98
// This supports both Product and DefinedByConstructorParams
@@ -87,6 +105,13 @@ object AgnosticEncoders {
87
105
override def dataType : DataType = schema
88
106
}
89
107
108
+ case class RowEncoder (fields : Seq [EncoderField ]) extends AgnosticEncoder [Row ] {
109
+ override def isPrimitive : Boolean = false
110
+ override val schema : StructType = StructType (fields.map(_.structField))
111
+ override def dataType : DataType = schema
112
+ override def clsTag : ClassTag [Row ] = classTag[Row ]
113
+ }
114
+
90
115
// This will only work for encoding from/to Sparks' InternalRow format.
91
116
// It is here for compatibility.
92
117
case class UDTEncoder [E >: Null ](
@@ -116,39 +141,74 @@ object AgnosticEncoders {
116
141
}
117
142
118
143
// Primitive encoders
119
- case object PrimitiveBooleanEncoder extends LeafEncoder [Boolean ](BooleanType )
120
- case object PrimitiveByteEncoder extends LeafEncoder [Byte ](ByteType )
121
- case object PrimitiveShortEncoder extends LeafEncoder [Short ](ShortType )
122
- case object PrimitiveIntEncoder extends LeafEncoder [Int ](IntegerType )
123
- case object PrimitiveLongEncoder extends LeafEncoder [Long ](LongType )
124
- case object PrimitiveFloatEncoder extends LeafEncoder [Float ](FloatType )
125
- case object PrimitiveDoubleEncoder extends LeafEncoder [Double ](DoubleType )
144
+ abstract class PrimitiveLeafEncoder [E : ClassTag ](dataType : DataType )
145
+ extends LeafEncoder [E ](dataType)
146
+ case object PrimitiveBooleanEncoder extends PrimitiveLeafEncoder [Boolean ](BooleanType )
147
+ case object PrimitiveByteEncoder extends PrimitiveLeafEncoder [Byte ](ByteType )
148
+ case object PrimitiveShortEncoder extends PrimitiveLeafEncoder [Short ](ShortType )
149
+ case object PrimitiveIntEncoder extends PrimitiveLeafEncoder [Int ](IntegerType )
150
+ case object PrimitiveLongEncoder extends PrimitiveLeafEncoder [Long ](LongType )
151
+ case object PrimitiveFloatEncoder extends PrimitiveLeafEncoder [Float ](FloatType )
152
+ case object PrimitiveDoubleEncoder extends PrimitiveLeafEncoder [Double ](DoubleType )
126
153
127
154
// Primitive wrapper encoders.
128
- case object NullEncoder extends LeafEncoder [java.lang.Void ](NullType )
129
- case object BoxedBooleanEncoder extends LeafEncoder [java.lang.Boolean ](BooleanType )
130
- case object BoxedByteEncoder extends LeafEncoder [java.lang.Byte ](ByteType )
131
- case object BoxedShortEncoder extends LeafEncoder [java.lang.Short ](ShortType )
132
- case object BoxedIntEncoder extends LeafEncoder [java.lang.Integer ](IntegerType )
133
- case object BoxedLongEncoder extends LeafEncoder [java.lang.Long ](LongType )
134
- case object BoxedFloatEncoder extends LeafEncoder [java.lang.Float ](FloatType )
135
- case object BoxedDoubleEncoder extends LeafEncoder [java.lang.Double ](DoubleType )
155
+ abstract class BoxedLeafEncoder [E : ClassTag , P ](
156
+ dataType : DataType ,
157
+ val primitive : PrimitiveLeafEncoder [P ])
158
+ extends LeafEncoder [E ](dataType)
159
+ case object BoxedBooleanEncoder
160
+ extends BoxedLeafEncoder [java.lang.Boolean , Boolean ](BooleanType , PrimitiveBooleanEncoder )
161
+ case object BoxedByteEncoder
162
+ extends BoxedLeafEncoder [java.lang.Byte , Byte ](ByteType , PrimitiveByteEncoder )
163
+ case object BoxedShortEncoder
164
+ extends BoxedLeafEncoder [java.lang.Short , Short ](ShortType , PrimitiveShortEncoder )
165
+ case object BoxedIntEncoder
166
+ extends BoxedLeafEncoder [java.lang.Integer , Int ](IntegerType , PrimitiveIntEncoder )
167
+ case object BoxedLongEncoder
168
+ extends BoxedLeafEncoder [java.lang.Long , Long ](LongType , PrimitiveLongEncoder )
169
+ case object BoxedFloatEncoder
170
+ extends BoxedLeafEncoder [java.lang.Float , Float ](FloatType , PrimitiveFloatEncoder )
171
+ case object BoxedDoubleEncoder
172
+ extends BoxedLeafEncoder [java.lang.Double , Double ](DoubleType , PrimitiveDoubleEncoder )
136
173
137
174
// Nullable leaf encoders
175
+ case object NullEncoder extends LeafEncoder [java.lang.Void ](NullType )
138
176
case object StringEncoder extends LeafEncoder [String ](StringType )
139
177
case object BinaryEncoder extends LeafEncoder [Array [Byte ]](BinaryType )
140
- case object SparkDecimalEncoder extends LeafEncoder [Decimal ](DecimalType .SYSTEM_DEFAULT )
141
- case object ScalaDecimalEncoder extends LeafEncoder [BigDecimal ](DecimalType .SYSTEM_DEFAULT )
142
- case object JavaDecimalEncoder extends LeafEncoder [JBigDecimal ](DecimalType .SYSTEM_DEFAULT )
143
178
case object ScalaBigIntEncoder extends LeafEncoder [BigInt ](DecimalType .BigIntDecimal )
144
179
case object JavaBigIntEncoder extends LeafEncoder [JBigInt ](DecimalType .BigIntDecimal )
145
180
case object CalendarIntervalEncoder extends LeafEncoder [CalendarInterval ](CalendarIntervalType )
146
181
case object DayTimeIntervalEncoder extends LeafEncoder [Duration ](DayTimeIntervalType ())
147
182
case object YearMonthIntervalEncoder extends LeafEncoder [Period ](YearMonthIntervalType ())
148
- case object DateEncoder extends LeafEncoder [java.sql.Date ](DateType )
149
- case object LocalDateEncoder extends LeafEncoder [LocalDate ](DateType )
150
- case object TimestampEncoder extends LeafEncoder [java.sql.Timestamp ](TimestampType )
151
- case object InstantEncoder extends LeafEncoder [Instant ](TimestampType )
183
+ case class DateEncoder (override val lenientSerialization : Boolean )
184
+ extends LeafEncoder [jsql.Date ](DateType )
185
+ case class LocalDateEncoder (override val lenientSerialization : Boolean )
186
+ extends LeafEncoder [LocalDate ](DateType )
187
+ case class TimestampEncoder (override val lenientSerialization : Boolean )
188
+ extends LeafEncoder [jsql.Timestamp ](TimestampType )
189
+ case class InstantEncoder (override val lenientSerialization : Boolean )
190
+ extends LeafEncoder [Instant ](TimestampType )
152
191
case object LocalDateTimeEncoder extends LeafEncoder [LocalDateTime ](TimestampNTZType )
192
+
193
+ case class SparkDecimalEncoder (dt : DecimalType ) extends LeafEncoder [Decimal ](dt)
194
+ case class ScalaDecimalEncoder (dt : DecimalType ) extends LeafEncoder [BigDecimal ](dt)
195
+ case class JavaDecimalEncoder (dt : DecimalType , override val lenientSerialization : Boolean )
196
+ extends LeafEncoder [JBigDecimal ](dt)
197
+
198
+ val STRICT_DATE_ENCODER : DateEncoder = DateEncoder (lenientSerialization = false )
199
+ val STRICT_LOCAL_DATE_ENCODER : LocalDateEncoder = LocalDateEncoder (lenientSerialization = false )
200
+ val STRICT_TIMESTAMP_ENCODER : TimestampEncoder = TimestampEncoder (lenientSerialization = false )
201
+ val STRICT_INSTANT_ENCODER : InstantEncoder = InstantEncoder (lenientSerialization = false )
202
+ val LENIENT_DATE_ENCODER : DateEncoder = DateEncoder (lenientSerialization = true )
203
+ val LENIENT_LOCAL_DATE_ENCODER : LocalDateEncoder = LocalDateEncoder (lenientSerialization = true )
204
+ val LENIENT_TIMESTAMP_ENCODER : TimestampEncoder = TimestampEncoder (lenientSerialization = true )
205
+ val LENIENT_INSTANT_ENCODER : InstantEncoder = InstantEncoder (lenientSerialization = true )
206
+
207
+ val DEFAULT_SPARK_DECIMAL_ENCODER : SparkDecimalEncoder =
208
+ SparkDecimalEncoder (DecimalType .SYSTEM_DEFAULT )
209
+ val DEFAULT_SCALA_DECIMAL_ENCODER : ScalaDecimalEncoder =
210
+ ScalaDecimalEncoder (DecimalType .SYSTEM_DEFAULT )
211
+ val DEFAULT_JAVA_DECIMAL_ENCODER : JavaDecimalEncoder =
212
+ JavaDecimalEncoder (DecimalType .SYSTEM_DEFAULT , lenientSerialization = false )
153
213
}
154
214
0 commit comments