-
Notifications
You must be signed in to change notification settings - Fork 28.6k
[SPARK-23765][SQL] Supports custom line separator for json datasource #20877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -18,6 +18,7 @@ | |||||||||||||
package org.apache.spark.sql.catalyst.json | ||||||||||||||
|
||||||||||||||
import java.io.Writer | ||||||||||||||
import java.nio.charset.StandardCharsets | ||||||||||||||
|
||||||||||||||
import com.fasterxml.jackson.core._ | ||||||||||||||
|
||||||||||||||
|
@@ -74,6 +75,8 @@ private[sql] class JacksonGenerator( | |||||||||||||
|
||||||||||||||
private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) | ||||||||||||||
|
||||||||||||||
private val lineSeparator: String = options.lineSeparatorInWrite | ||||||||||||||
|
||||||||||||||
private def makeWriter(dataType: DataType): ValueWriter = dataType match { | ||||||||||||||
case NullType => | ||||||||||||||
(row: SpecializedGetters, ordinal: Int) => | ||||||||||||||
|
@@ -251,5 +254,8 @@ private[sql] class JacksonGenerator( | |||||||||||||
mapType = dataType.asInstanceOf[MapType])) | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
def writeLineEnding(): Unit = gen.writeRaw('\n') | ||||||||||||||
def writeLineEnding(): Unit = { | ||||||||||||||
// Note that JSON uses writer with UTF-8 charset. This string will be written out as UTF-8. | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant here: spark/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CodecStreams.scala Lines 88 to 93 in de36f65
|
||||||||||||||
gen.writeRaw(lineSeparator) | ||||||||||||||
} | ||||||||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -268,6 +268,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo | |
* `java.text.SimpleDateFormat`. This applies to timestamp type.</li> | ||
* <li>`multiLine` (default `false`): parse one record, which may span multiple lines, | ||
* per file</li> | ||
* <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a test case for testing the default covers |
||
* that should be used for parsing.</li> | ||
* </ul> | ||
* | ||
* @since 2.0.0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -208,9 +208,11 @@ class TextSuite extends QueryTest with SharedSQLContext { | |
} | ||
} | ||
|
||
Seq("|", "^", "::", "!!!@3", 0x1E.toChar.toString).foreach { lineSep => | ||
// scalastyle:off nonascii | ||
Seq("|", "^", "::", "!!!@3", 0x1E.toChar.toString, "아").foreach { lineSep => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strictly unrelated but I just added. I am fine with reverting this out if it bugs anyone. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, "아" means just "ah" without any meaning .. |
||
testLineSeparator(lineSep) | ||
} | ||
// scalastyle:on nonascii | ||
|
||
private def testFile: String = { | ||
Thread.currentThread().getContextClassLoader.getResource("test-data/text-suite.txt").toString | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can be private?