File tree Expand file tree Collapse file tree 7 files changed +34
-34
lines changed
catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv
core/src/main/scala/org/apache/spark/sql
execution/datasources/csv Expand file tree Collapse file tree 7 files changed +34
-34
lines changed Original file line number Diff line number Diff line change 15
15
* limitations under the License.
16
16
*/
17
17
18
- package org .apache .spark .sql .execution . datasources .csv
18
+ package org .apache .spark .sql .catalyst .csv
19
19
20
20
import com .univocity .parsers .csv .CsvParser
21
21
@@ -107,7 +107,7 @@ class CSVHeaderChecker(
107
107
}
108
108
109
109
// This is currently only used to parse CSV with multiLine mode.
110
- private [csv ] def checkHeaderColumnNames (tokenizer : CsvParser ): Unit = {
110
+ private [sql ] def checkHeaderColumnNames (tokenizer : CsvParser ): Unit = {
111
111
assert(options.multiLine, " This method should be executed with multiLine." )
112
112
if (options.headerFlag) {
113
113
val firstRecord = tokenizer.parseNext()
@@ -116,7 +116,7 @@ class CSVHeaderChecker(
116
116
}
117
117
118
118
// This is currently only used to parse CSV with non-multiLine mode.
119
- private [csv ] def checkHeaderColumnNames (lines : Iterator [String ], tokenizer : CsvParser ): Unit = {
119
+ private [sql ] def checkHeaderColumnNames (lines : Iterator [String ], tokenizer : CsvParser ): Unit = {
120
120
assert(! options.multiLine, " This method should not be executed with multiline." )
121
121
// Checking that column names in the header are matched to field names of the schema.
122
122
// The header will be removed from lines.
Original file line number Diff line number Diff line change 18
18
package org .apache .spark .sql .catalyst .csv
19
19
20
20
object CSVUtils {
21
+
22
+ def skipComments (iter : Iterator [String ], options : CSVOptions ): Iterator [String ] = {
23
+ if (options.isCommentSet) {
24
+ val commentPrefix = options.comment.toString
25
+ iter.dropWhile { line =>
26
+ line.trim.isEmpty || line.trim.startsWith(commentPrefix)
27
+ }
28
+ } else {
29
+ iter.dropWhile(_.trim.isEmpty)
30
+ }
31
+ }
32
+
33
+ /**
34
+ * Extracts header and moves iterator forward so that only data remains in it
35
+ */
36
+ def extractHeader (iter : Iterator [String ], options : CSVOptions ): Option [String ] = {
37
+ val nonEmptyLines = skipComments(iter, options)
38
+ if (nonEmptyLines.hasNext) {
39
+ Some (nonEmptyLines.next())
40
+ } else {
41
+ None
42
+ }
43
+ }
44
+
21
45
/**
22
46
* Helper method that converts string representation of a character to actual character.
23
47
* It handles some Java escaped strings and throws exception if given string is longer than one
Original file line number Diff line number Diff line change @@ -22,14 +22,13 @@ import java.util.{Locale, Properties}
22
22
import scala .collection .JavaConverters ._
23
23
24
24
import com .fasterxml .jackson .databind .ObjectMapper
25
- import com .univocity .parsers .csv .CsvParser
26
25
27
26
import org .apache .spark .Partition
28
27
import org .apache .spark .annotation .InterfaceStability
29
28
import org .apache .spark .api .java .JavaRDD
30
29
import org .apache .spark .internal .Logging
31
30
import org .apache .spark .rdd .RDD
32
- import org .apache .spark .sql .catalyst .csv .CSVOptions
31
+ import org .apache .spark .sql .catalyst .csv .{ CSVHeaderChecker , CSVOptions }
33
32
import org .apache .spark .sql .catalyst .json .{CreateJacksonParser , JacksonParser , JSONOptions }
34
33
import org .apache .spark .sql .execution .command .DDLUtils
35
34
import org .apache .spark .sql .execution .datasources .{DataSource , FailureSafeParser }
Original file line number Diff line number Diff line change @@ -34,7 +34,7 @@ import org.apache.spark.internal.Logging
34
34
import org .apache .spark .rdd .{BinaryFileRDD , RDD }
35
35
import org .apache .spark .sql .{Dataset , Encoders , SparkSession }
36
36
import org .apache .spark .sql .catalyst .InternalRow
37
- import org .apache .spark .sql .catalyst .csv .{CSVInferSchema , CSVOptions }
37
+ import org .apache .spark .sql .catalyst .csv .{CSVHeaderChecker , CSVInferSchema , CSVOptions }
38
38
import org .apache .spark .sql .execution .datasources ._
39
39
import org .apache .spark .sql .execution .datasources .text .TextFileFormat
40
40
import org .apache .spark .sql .types .StructType
Original file line number Diff line number Diff line change @@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce._
26
26
import org .apache .spark .internal .Logging
27
27
import org .apache .spark .sql .{AnalysisException , SparkSession }
28
28
import org .apache .spark .sql .catalyst .InternalRow
29
- import org .apache .spark .sql .catalyst .csv .CSVOptions
29
+ import org .apache .spark .sql .catalyst .csv .{ CSVHeaderChecker , CSVOptions }
30
30
import org .apache .spark .sql .catalyst .util .CompressionCodecs
31
31
import org .apache .spark .sql .execution .datasources ._
32
32
import org .apache .spark .sql .sources ._
Original file line number Diff line number Diff line change @@ -68,32 +68,9 @@ object CSVUtils {
68
68
}
69
69
}
70
70
71
- def skipComments (iter : Iterator [String ], options : CSVOptions ): Iterator [String ] = {
72
- if (options.isCommentSet) {
73
- val commentPrefix = options.comment.toString
74
- iter.dropWhile { line =>
75
- line.trim.isEmpty || line.trim.startsWith(commentPrefix)
76
- }
77
- } else {
78
- iter.dropWhile(_.trim.isEmpty)
79
- }
80
- }
81
-
82
- /**
83
- * Extracts header and moves iterator forward so that only data remains in it
84
- */
85
- def extractHeader (iter : Iterator [String ], options : CSVOptions ): Option [String ] = {
86
- val nonEmptyLines = skipComments(iter, options)
87
- if (nonEmptyLines.hasNext) {
88
- Some (nonEmptyLines.next())
89
- } else {
90
- None
91
- }
92
- }
93
-
94
- /**
95
- * Generates a header from the given row which is null-safe and duplicate-safe.
96
- */
71
+ /**
72
+ * Generates a header from the given row which is null-safe and duplicate-safe.
73
+ */
97
74
def makeSafeHeader (
98
75
row : Array [String ],
99
76
caseSensitive : Boolean ,
Original file line number Diff line number Diff line change @@ -27,7 +27,7 @@ import com.univocity.parsers.csv.CsvParser
27
27
28
28
import org .apache .spark .internal .Logging
29
29
import org .apache .spark .sql .catalyst .InternalRow
30
- import org .apache .spark .sql .catalyst .csv .CSVOptions
30
+ import org .apache .spark .sql .catalyst .csv .{ CSVHeaderChecker , CSVOptions }
31
31
import org .apache .spark .sql .catalyst .expressions .GenericInternalRow
32
32
import org .apache .spark .sql .catalyst .util .{BadRecordException , DateTimeUtils }
33
33
import org .apache .spark .sql .execution .datasources .FailureSafeParser
You can’t perform that action at this time.
0 commit comments