Skip to content

Commit db1c5b1

Browse files
committed
Revert "[SPARK-26248][SQL] Infer date type from CSV"
This reverts commit 5217f7b.
1 parent e408e05 commit db1c5b1

File tree

2 files changed

+4
-34
lines changed

2 files changed

+4
-34
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,16 @@ import scala.util.control.Exception.allCatch
2222
import org.apache.spark.rdd.RDD
2323
import org.apache.spark.sql.catalyst.analysis.TypeCoercion
2424
import org.apache.spark.sql.catalyst.expressions.ExprUtils
25-
import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
25+
import org.apache.spark.sql.catalyst.util.TimestampFormatter
2626
import org.apache.spark.sql.types._
2727

2828
class CSVInferSchema(val options: CSVOptions) extends Serializable {
2929

3030
@transient
31-
private lazy val timestampFormatter = TimestampFormatter(
31+
private lazy val timestampParser = TimestampFormatter(
3232
options.timestampFormat,
3333
options.timeZone,
3434
options.locale)
35-
@transient
36-
private lazy val dateFormatter = DateFormatter(
37-
options.dateFormat,
38-
options.locale)
3935

4036
private val decimalParser = {
4137
ExprUtils.getDecimalParser(options.locale)
@@ -108,7 +104,6 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
108104
compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
109105
case DoubleType => tryParseDouble(field)
110106
case TimestampType => tryParseTimestamp(field)
111-
case DateType => tryParseDate(field)
112107
case BooleanType => tryParseBoolean(field)
113108
case StringType => StringType
114109
case other: DataType =>
@@ -164,16 +159,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
164159
}
165160

166161
private def tryParseTimestamp(field: String): DataType = {
167-
if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
162+
// This case infers a custom `dataFormat` is set.
163+
if ((allCatch opt timestampParser.parse(field)).isDefined) {
168164
TimestampType
169-
} else {
170-
tryParseDate(field)
171-
}
172-
}
173-
174-
private def tryParseDate(field: String): DataType = {
175-
if ((allCatch opt dateFormatter.parse(field)).isDefined) {
176-
DateType
177165
} else {
178166
tryParseBoolean(field)
179167
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -187,22 +187,4 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
187187

188188
Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
189189
}
190-
191-
test("inferring date type") {
192-
var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "GMT")
193-
var inferSchema = new CSVInferSchema(options)
194-
assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
195-
196-
options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT")
197-
inferSchema = new CSVInferSchema(options)
198-
assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
199-
200-
options = new CSVOptions(
201-
Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
202-
columnPruning = false,
203-
defaultTimeZoneId = "GMT")
204-
inferSchema = new CSVInferSchema(options)
205-
assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType)
206-
assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
207-
}
208190
}

0 commit comments

Comments
 (0)