Skip to content

Commit 0c5e955

Browse files
committed
Merge remote-tracking branch 'origin/master' into schema_of_csv-function
2 parents c038aaa + 4e141a4 commit 0c5e955

File tree

17 files changed

+466
-162
lines changed

17 files changed

+466
-162
lines changed

core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
118118
case e: HaltReplayException =>
119119
// Just stop replay.
120120
case _: EOFException if maybeTruncated =>
121+
case _: IOException if maybeTruncated =>
122+
logWarning(s"Failed to read Spark event log: $sourceName")
121123
case ioe: IOException =>
122124
throw ioe
123125
case e: Exception =>

core/src/main/scala/org/apache/spark/ui/PagedTable.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import org.apache.spark.util.Utils
3131
*
3232
* @param pageSize the number of rows in a page
3333
*/
34-
private[ui] abstract class PagedDataSource[T](val pageSize: Int) {
34+
private[spark] abstract class PagedDataSource[T](val pageSize: Int) {
3535

3636
if (pageSize <= 0) {
3737
throw new IllegalArgumentException("Page size must be positive")
@@ -72,7 +72,7 @@ private[ui] case class PageData[T](totalPage: Int, data: Seq[T])
7272
/**
7373
* A paged table that will generate a HTML table for a specified page and also the page navigation.
7474
*/
75-
private[ui] trait PagedTable[T] {
75+
private[spark] trait PagedTable[T] {
7676

7777
def tableId: String
7878

dev/run-tests-jenkins.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def print_err(msg):
3939
def post_message_to_github(msg, ghprb_pull_id):
4040
print("Attempting to post to Github...")
4141

42-
url = "https://api.github.com/repos/apache/spark/issues/" + ghprb_pull_id + "/comments"
42+
api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
43+
url = api_url + "/issues/" + ghprb_pull_id + "/comments"
4344
github_oauth_key = os.environ["GITHUB_OAUTH_KEY"]
4445

4546
posted_message = json.dumps({"body": msg})
@@ -176,7 +177,8 @@ def main():
176177
build_display_name = os.environ["BUILD_DISPLAY_NAME"]
177178
build_url = os.environ["BUILD_URL"]
178179

179-
commit_url = "https://github.com/apache/spark/commit/" + ghprb_actual_commit
180+
project_url = os.getenv("SPARK_PROJECT_URL", "https://github.com/apache/spark")
181+
commit_url = project_url + "/commit/" + ghprb_actual_commit
180182

181183
# GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
182184
short_commit_hash = ghprb_actual_commit[0:7]

docs/building-spark.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,31 @@ For SBT, specify a complete scala version using (e.g. 2.12.6):
260260
./build/sbt -Dscala.version=2.12.6
261261

262262
Otherwise, the sbt-pom-reader plugin will use the `scala.version` specified in the spark-parent pom.
263+
264+
## Running Jenkins tests with Github Enterprise
265+
266+
To run tests with Jenkins:
267+
268+
./dev/run-tests-jenkins
269+
270+
If use an individual repository or a repository on GitHub Enterprise, export below environment variables before running above command.
271+
272+
### Related environment variables
273+
274+
<table class="table">
275+
<tr><th>Variable Name</th><th>Default</th><th>Meaning</th></tr>
276+
<tr>
277+
<td><code>SPARK_PROJECT_URL</code></td>
278+
<td>https://github.com/apache/spark</td>
279+
<td>
280+
The Spark project URL of GitHub Enterprise.
281+
</td>
282+
</tr>
283+
<tr>
284+
<td><code>GITHUB_API_BASE</code></td>
285+
<td>https://api.github.com/repos/apache/spark</td>
286+
<td>
287+
The Spark project API server URL of GitHub Enterprise.
288+
</td>
289+
</tr>
290+
</table>

sbin/start-master.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
3131
echo "Usage: ./sbin/start-master.sh [options]"
3232
pattern="Usage:"
3333
pattern+="\|Using Spark's default log4j profile:"
34-
pattern+="\|Registered signal handlers for"
34+
pattern+="\|Started daemon with process name"
35+
pattern+="\|Registered signal handler for"
3536

3637
"${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
3738
exit 1

sbin/start-slave.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ if [[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
4343
echo "Usage: ./sbin/start-slave.sh [options] <master>"
4444
pattern="Usage:"
4545
pattern+="\|Using Spark's default log4j profile:"
46-
pattern+="\|Registered signal handlers for"
46+
pattern+="\|Started daemon with process name"
47+
pattern+="\|Registered signal handler for"
4748

4849
"${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
4950
exit 1

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVUtilsSuite.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class CSVUtilsSuite extends SparkFunSuite {
2828
assert(CSVUtils.toChar("""\"""") === '\"')
2929
assert(CSVUtils.toChar("""\'""") === '\'')
3030
assert(CSVUtils.toChar("""\u0000""") === '\u0000')
31+
assert(CSVUtils.toChar("""\\""") === '\\')
3132
}
3233

3334
test("Does not accept delimiter larger than one character") {
@@ -44,4 +45,17 @@ class CSVUtilsSuite extends SparkFunSuite {
4445
assert(exception.getMessage.contains("Unsupported special character for delimiter"))
4546
}
4647

48+
test("string with one backward slash is prohibited") {
49+
val exception = intercept[IllegalArgumentException]{
50+
CSVUtils.toChar("""\""")
51+
}
52+
assert(exception.getMessage.contains("Single backslash is prohibited"))
53+
}
54+
55+
test("output proper error message for empty string") {
56+
val exception = intercept[IllegalArgumentException]{
57+
CSVUtils.toChar("")
58+
}
59+
assert(exception.getMessage.contains("Delimiter cannot be empty string"))
60+
}
4761
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.spark.SparkFunSuite
2323
import org.apache.spark.sql.catalyst.InternalRow
2424
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2525
import org.apache.spark.sql.catalyst.plans.PlanTestBase
26-
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeTestUtils, DateTimeUtils, GenericArrayData, PermissiveMode}
26+
import org.apache.spark.sql.catalyst.util._
2727
import org.apache.spark.sql.internal.SQLConf
2828
import org.apache.spark.sql.types._
2929
import org.apache.spark.unsafe.types.UTF8String
@@ -510,7 +510,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
510510
)
511511

512512
val jsonData2 = """{"t": "2016-01-01T00:00:00"}"""
513-
for (tz <- DateTimeTestUtils.ALL_TIMEZONES) {
513+
for (tz <- DateTimeTestUtils.outstandingTimezones) {
514514
c = Calendar.getInstance(tz)
515515
c.set(2016, 0, 1, 0, 0, 0)
516516
c.set(Calendar.MILLISECOND, 0)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ object DateTimeTestUtils {
2626

2727
val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone)
2828

29+
val outstandingTimezones: Seq[TimeZone] = Seq(
30+
"UTC",
31+
"PST",
32+
"CET",
33+
"Africa/Dakar",
34+
"America/Los_Angeles",
35+
"Antarctica/Vostok",
36+
"Asia/Hong_Kong",
37+
"Europe/Amsterdam").map(TimeZone.getTimeZone)
38+
2939
def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = {
3040
val originalDefaultTimeZone = TimeZone.getDefault
3141
try {

sql/core/src/main/scala/org/apache/spark/sql/Column.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,13 @@ class Column(val expr: Expression) extends Logging {
199199
/**
200200
* Extracts a value or values from a complex type.
201201
* The following types of extraction are supported:
202-
*
203-
* - Given an Array, an integer ordinal can be used to retrieve a single value.
204-
* - Given a Map, a key of the correct type can be used to retrieve an individual value.
205-
* - Given a Struct, a string fieldName can be used to extract that field.
206-
* - Given an Array of Structs, a string fieldName can be used to extract filed
207-
* of every struct in that array, and return an Array of fields
208-
*
202+
* <ul>
203+
* <li>Given an Array, an integer ordinal can be used to retrieve a single value.</li>
204+
* <li>Given a Map, a key of the correct type can be used to retrieve an individual value.</li>
205+
* <li>Given a Struct, a string fieldName can be used to extract that field.</li>
206+
* <li>Given an Array of Structs, a string fieldName can be used to extract filed
207+
* of every struct in that array, and return an Array of fields.</li>
208+
* </ul>
209209
* @group expr_ops
210210
* @since 1.4.0
211211
*/

sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
4747

4848
/**
4949
* Specifies the behavior when data or table already exists. Options include:
50-
* - `SaveMode.Overwrite`: overwrite the existing data.
51-
* - `SaveMode.Append`: append the data.
52-
* - `SaveMode.Ignore`: ignore the operation (i.e. no-op).
53-
* - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime.
50+
* <ul>
51+
* <li>`SaveMode.Overwrite`: overwrite the existing data.</li>
52+
* <li>`SaveMode.Append`: append the data.</li>
53+
* <li>`SaveMode.Ignore`: ignore the operation (i.e. no-op).</li>
54+
* <li>`SaveMode.ErrorIfExists`: default option, throw an exception at runtime.</li>
55+
* </ul>
5456
*
5557
* @since 1.4.0
5658
*/
@@ -61,10 +63,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
6163

6264
/**
6365
* Specifies the behavior when data or table already exists. Options include:
64-
* - `overwrite`: overwrite the existing data.
65-
* - `append`: append the data.
66-
* - `ignore`: ignore the operation (i.e. no-op).
67-
* - `error` or `errorifexists`: default option, throw an exception at runtime.
66+
* <ul>
67+
* <li>`overwrite`: overwrite the existing data.</li>
68+
* <li>`append`: append the data.</li>
69+
* <li>`ignore`: ignore the operation (i.e. no-op).</li>
70+
* <li>`error` or `errorifexists`: default option, throw an exception at runtime.</li>
71+
* </ul>
6872
*
6973
* @since 1.4.0
7074
*/
@@ -163,9 +167,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
163167
* Partitions the output by the given columns on the file system. If specified, the output is
164168
* laid out on the file system similar to Hive's partitioning scheme. As an example, when we
165169
* partition a dataset by year and then month, the directory layout would look like:
166-
*
167-
* - year=2016/month=01/
168-
* - year=2016/month=02/
170+
* <ul>
171+
* <li>year=2016/month=01/</li>
172+
* <li>year=2016/month=02/</li>
173+
* </ul>
169174
*
170175
* Partitioning is one of the most widely used techniques to optimize physical data layout.
171176
* It provides a coarse-grained index for skipping unnecessary data reads when queries have

sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,11 @@ abstract class ForeachWriter[T] extends Serializable {
130130
* Called when stopping to process one partition of new data in the executor side. This is
131131
* guaranteed to be called either `open` returns `true` or `false`. However,
132132
* `close` won't be called in the following cases:
133-
* - JVM crashes without throwing a `Throwable`
134-
* - `open` throws a `Throwable`.
133+
*
134+
* <ul>
135+
* <li>JVM crashes without throwing a `Throwable`</li>
136+
* <li>`open` throws a `Throwable`.</li>
137+
* </ul>
135138
*
136139
* @param errorOrNull the error thrown during processing data or null if there was no error.
137140
*/

sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,15 @@ import org.apache.spark.sql.catalyst.rules.Rule
3030
* regarding binary compatibility and source compatibility of methods here.
3131
*
3232
* This current provides the following extension points:
33-
* - Analyzer Rules.
34-
* - Check Analysis Rules
35-
* - Optimizer Rules.
36-
* - Planning Strategies.
37-
* - Customized Parser.
38-
* - (External) Catalog listeners.
33+
*
34+
* <ul>
35+
* <li>Analyzer Rules.</li>
36+
* <li>Check Analysis Rules.</li>
37+
* <li>Optimizer Rules.</li>
38+
* <li>Planning Strategies.</li>
39+
* <li>Customized Parser.</li>
40+
* <li>(External) Catalog listeners.</li>
41+
* </ul>
3942
*
4043
* The extensions can be used by calling withExtension on the [[SparkSession.Builder]], for
4144
* example:

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -117,23 +117,25 @@ object CSVUtils {
117117
*/
118118
@throws[IllegalArgumentException]
119119
def toChar(str: String): Char = {
120-
if (str.charAt(0) == '\\') {
121-
str.charAt(1)
122-
match {
123-
case 't' => '\t'
124-
case 'r' => '\r'
125-
case 'b' => '\b'
126-
case 'f' => '\f'
127-
case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
128-
case '\'' => '\''
129-
case 'u' if str == """\u0000""" => '\u0000'
130-
case _ =>
131-
throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
132-
}
133-
} else if (str.length == 1) {
134-
str.charAt(0)
135-
} else {
136-
throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
120+
(str: Seq[Char]) match {
121+
case Seq() => throw new IllegalArgumentException("Delimiter cannot be empty string")
122+
case Seq('\\') => throw new IllegalArgumentException("Single backslash is prohibited." +
123+
" It has special meaning as beginning of an escape sequence." +
124+
" To get the backslash character, pass a string with two backslashes as the delimiter.")
125+
case Seq(c) => c
126+
case Seq('\\', 't') => '\t'
127+
case Seq('\\', 'r') => '\r'
128+
case Seq('\\', 'b') => '\b'
129+
case Seq('\\', 'f') => '\f'
130+
// In case user changes quote char and uses \" as delimiter in options
131+
case Seq('\\', '\"') => '\"'
132+
case Seq('\\', '\'') => '\''
133+
case Seq('\\', '\\') => '\\'
134+
case _ if str == """\u0000""" => '\u0000'
135+
case Seq('\\', _) =>
136+
throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
137+
case _ =>
138+
throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
137139
}
138140
}
139141

0 commit comments

Comments
 (0)