From 8aaa9e890acfa318775627fbe6d12eaf34bb35b1 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Tue, 4 May 2021 08:44:18 +0900 Subject: [PATCH] [SPARK-35250][SQL][DOCS] Fix duplicated STOP_AT_DELIMITER to SKIP_VALUE at CSV's unescapedQuoteHandling option documentation ### What changes were proposed in this pull request? This is rather a followup of https://github.com/apache/spark/pull/30518 that should be ported back to `branch-3.1` too. `STOP_AT_DELIMITER` was mistakenly used twice. The duplicated `STOP_AT_DELIMITER` should be `SKIP_VALUE` in the documentation. ### Why are the changes needed? To correctly document. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user-facing documentation. ### How was this patch tested? I checked them via running linters. Closes #32423 from HyukjinKwon/SPARK-35250. Authored-by: HyukjinKwon Signed-off-by: HyukjinKwon --- python/pyspark/sql/readwriter.py | 2 +- python/pyspark/sql/streaming.py | 2 +- .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 2 +- .../scala/org/apache/spark/sql/streaming/DataStreamReader.scala | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index c4134b7bf690b..31c1f2f7ca3c5 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -728,7 +728,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, consider the value as an unquoted value. This will make the parser accumulate all characters until the delimiter or a line ending is found in the input. - * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, the content parsed + * ``SKIP_VALUE``: If unescaped quotes are found in the input, the content parsed for the given value will be skipped and the value set in nullValue will be produced instead. * ``RAISE_ERROR``: If unescaped quotes are found in the input, a TextParsingException diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 8b8484defb600..2c90d7f2dee70 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -941,7 +941,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, consider the value as an unquoted value. This will make the parser accumulate all characters until the delimiter or a line ending is found in the input. - * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, the content parsed + * ``SKIP_VALUE``: If unescaped quotes are found in the input, the content parsed for the given value will be skipped and the value set in nullValue will be produced instead. * ``RAISE_ERROR``: If unescaped quotes are found in the input, a TextParsingException diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 9ff37a6f1c69c..f7e1903da6877 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -747,7 +747,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, consider the value * as an unquoted value. This will make the parser accumulate all characters until the * delimiter or a line ending is found in the input.
  • - *
  • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, the content parsed + *
  • `SKIP_VALUE`: If unescaped quotes are found in the input, the content parsed * for the given value will be skipped and the value set in nullValue will be produced * instead.
  • *
  • `RAISE_ERROR`: If unescaped quotes are found in the input, a TextParsingException diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index a6913fab97a40..f83269e17b86b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -412,7 +412,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo *
  • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, consider the value * as an unquoted value. This will make the parser accumulate all characters until the * delimiter or a line ending is found in the input.
  • - *
  • `STOP_AT_DELIMITER`: If unescaped quotes are found in the input, the content parsed + *
  • `SKIP_VALUE`: If unescaped quotes are found in the input, the content parsed * for the given value will be skipped and the value set in nullValue will be produced * instead.
  • *
  • `RAISE_ERROR`: If unescaped quotes are found in the input, a TextParsingException