Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,21 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
// UDFToString
private[this] def castToString: Any => Any = child.dataType match {
case BinaryType => buildCast[Array[Byte]](_, new String(_, "UTF-8"))
case FixedLenByteArrayType(_) => buildCast[Array[Byte]](_, new String(_, "UTF-8"))
case TimestampType => buildCast[Timestamp](_, timestampToString)
case _ => buildCast[Any](_, _.toString)
}

// BinaryConverter
private[this] def castToBinary: Any => Any = child.dataType match {
case StringType => buildCast[String](_, _.getBytes("UTF-8"))
case FixedLenByteArrayType(_) => buildCast[Array[Byte]](_, a => a)
}

// FixedLenBinaryConverter
private[this] def castToFixedLenBinary(length:Int): Any => Any = child.dataType match {
case StringType => buildCast[String](_, _.getBytes("UTF-8").slice(0 ,length))
case BinaryType => buildCast[Array[Byte]](_, _.slice(0, length))
}

// UDFToBoolean
Expand Down Expand Up @@ -256,6 +264,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
case FloatType => castToFloat
case LongType => castToLong
case DoubleType => castToDouble
case FixedLenByteArrayType(_) =>
castToFixedLenBinary(dataType.asInstanceOf[FixedLenByteArrayType].length)
}

override def eval(input: Row): Any = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,13 @@ object DataType extends RegexParsers {
"BinaryType" ^^^ BinaryType |
"BooleanType" ^^^ BooleanType |
"DecimalType" ^^^ DecimalType |
"TimestampType" ^^^ TimestampType
"TimestampType" ^^^ TimestampType |
fixedLenByteArrayType

protected lazy val fixedLenByteArrayType: Parser[DataType] =
"FixedLenByteArrayType" ~> "(" ~> intVal <~ ")" ^^ {
case t => FixedLenByteArrayType(t)
}

protected lazy val arrayType: Parser[DataType] =
"ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
Expand All @@ -60,6 +66,11 @@ object DataType extends RegexParsers {
StructField(name, tpe, nullable = nullable)
}

protected lazy val intVal: Parser[Integer] =
"[0-9]+".r ^^ {
case t => t.toInt
}

protected lazy val boolVal: Parser[Boolean] =
"true" ^^^ true |
"false" ^^^ false
Expand Down Expand Up @@ -151,6 +162,11 @@ case object BinaryType extends DataType with PrimitiveType {
def simpleString: String = "binary"
}

case class FixedLenByteArrayType( length:Int ) extends DataType with PrimitiveType {
type JvmType = Array[Byte]
def simpleString: String = "fixed_len_byte_array(%d)".format(length)
}

case object BooleanType extends NativeType with PrimitiveType {
private[sql] type JvmType = Boolean
@transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[JvmType] }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
case ParquetPrimitiveTypeName.INT96 =>
// TODO: add BigInteger type? TODO(andre) use DecimalType instead????
sys.error("Potential loss of precision: cannot convert INT96")
case ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY =>
FixedLenByteArrayType(parquetType.getTypeLength)
case _ => sys.error(
s"Unsupported parquet datatype $parquetType")
}
Expand Down Expand Up @@ -195,6 +197,7 @@ private[parquet] object ParquetTypesConverter extends Logging {
case ShortType => Some(ParquetPrimitiveTypeName.INT32, None)
case ByteType => Some(ParquetPrimitiveTypeName.INT32, None)
case LongType => Some(ParquetPrimitiveTypeName.INT64, None)
case FixedLenByteArrayType(_) => Some(ParquetPrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, None)
case _ => None
}

Expand Down Expand Up @@ -246,9 +249,13 @@ private[parquet] object ParquetTypesConverter extends Logging {
if (nullable) Repetition.OPTIONAL else Repetition.REQUIRED
}
val primitiveType = fromPrimitiveDataType(ctype)
val typeLength = ctype match {
case FixedLenByteArrayType(_) => ctype.asInstanceOf[FixedLenByteArrayType].length
case _ => 0
}
primitiveType.map {
case (primitiveType, originalType) =>
new ParquetPrimitiveType(repetition, primitiveType, name, originalType.orNull)
new ParquetPrimitiveType(repetition, primitiveType, typeLength, name, originalType.orNull)
}.getOrElse {
ctype match {
case ArrayType(elementType, false) => {
Expand Down