Skip to content

Commit b58f2e6

Browse files
author
Andrew Or
committed
Merge branch 'master' of github.com:apache/spark into task-metrics-to-accums
Conflicts: core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
2 parents 06b958e + ad1503f commit b58f2e6

File tree

107 files changed

+2228
-1511
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+2228
-1511
lines changed

.rat-excludes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,5 @@ org.apache.spark.scheduler.SparkHistoryListenerFactory
8686
.*parquet
8787
LZ4BlockInputStream.java
8888
spark-deps-.*
89+
.*csv
90+
.*tsv

NOTICE

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,43 @@ Vis.js uses and redistributes the following third-party libraries:
610610

611611
===============================================================================
612612

613-
The CSS style for the navigation sidebar of the documentation was originally
613+
The CSS style for the navigation sidebar of the documentation was originally
614614
submitted by Óscar Nájera for the scikit-learn project. The scikit-learn project
615615
is distributed under the 3-Clause BSD license.
616616
===============================================================================
617+
618+
For CSV functionality:
619+
620+
/*
621+
* Copyright 2014 Databricks
622+
*
623+
* Licensed under the Apache License, Version 2.0 (the "License");
624+
* you may not use this file except in compliance with the License.
625+
* You may obtain a copy of the License at
626+
*
627+
* http://www.apache.org/licenses/LICENSE-2.0
628+
*
629+
* Unless required by applicable law or agreed to in writing, software
630+
* distributed under the License is distributed on an "AS IS" BASIS,
631+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
632+
* See the License for the specific language governing permissions and
633+
* limitations under the License.
634+
*/
635+
636+
/*
637+
* Copyright 2015 Ayasdi Inc
638+
*
639+
* Licensed under the Apache License, Version 2.0 (the "License");
640+
* you may not use this file except in compliance with the License.
641+
* You may obtain a copy of the License at
642+
*
643+
* http://www.apache.org/licenses/LICENSE-2.0
644+
*
645+
* Unless required by applicable law or agreed to in writing, software
646+
* distributed under the License is distributed on an "AS IS" BASIS,
647+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
648+
* See the License for the specific language governing permissions and
649+
* limitations under the License.
650+
*/
651+
652+

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ export("as.DataFrame",
278278
"read.parquet",
279279
"read.text",
280280
"sql",
281+
"str",
281282
"table",
282283
"tableNames",
283284
"tables",

R/pkg/R/DataFrame.R

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2299,3 +2299,76 @@ setMethod("with",
22992299
newEnv <- assignNewEnv(data)
23002300
eval(substitute(expr), envir = newEnv, enclos = newEnv)
23012301
})
2302+
2303+
#' Display the structure of a DataFrame, including column names, column types, as well as a
2304+
#' a small sample of rows.
2305+
#' @name str
2306+
#' @title Compactly display the structure of a dataset
2307+
#' @rdname str
2308+
#' @family DataFrame functions
2309+
#' @param object a DataFrame
2310+
#' @examples \dontrun{
2311+
#' # Create a DataFrame from the Iris dataset
2312+
#' irisDF <- createDataFrame(sqlContext, iris)
2313+
#'
2314+
#' # Show the structure of the DataFrame
2315+
#' str(irisDF)
2316+
#' }
2317+
setMethod("str",
2318+
signature(object = "DataFrame"),
2319+
function(object) {
2320+
2321+
# TODO: These could be made global parameters, though in R it's not the case
2322+
MAX_CHAR_PER_ROW <- 120
2323+
MAX_COLS <- 100
2324+
2325+
# Get the column names and types of the DataFrame
2326+
names <- names(object)
2327+
types <- coltypes(object)
2328+
2329+
# Get the first elements of the dataset. Limit number of columns accordingly
2330+
localDF <- if (ncol(object) > MAX_COLS) {
2331+
head(object[, c(1:MAX_COLS)])
2332+
} else {
2333+
head(object)
2334+
}
2335+
2336+
# The number of observations will not be displayed as computing the
2337+
# number of rows is a very expensive operation
2338+
cat(paste0("'", class(object), "': ", length(names), " variables:\n"))
2339+
2340+
if (nrow(localDF) > 0) {
2341+
for (i in 1 : ncol(localDF)) {
2342+
# Get the first elements for each column
2343+
2344+
firstElements <- if (types[i] == "character") {
2345+
paste(paste0("\"", localDF[,i], "\""), collapse = " ")
2346+
} else {
2347+
paste(localDF[,i], collapse = " ")
2348+
}
2349+
2350+
# Add the corresponding number of spaces for alignment
2351+
spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse="")
2352+
2353+
# Get the short type. For 'character', it would be 'chr';
2354+
# 'for numeric', it's 'num', etc.
2355+
dataType <- SHORT_TYPES[[types[i]]]
2356+
if (is.null(dataType)) {
2357+
dataType <- substring(types[i], 1, 3)
2358+
}
2359+
2360+
# Concatenate the colnames, coltypes, and first
2361+
# elements of each column
2362+
line <- paste0(" $ ", names[i], spaces, ": ",
2363+
dataType, " ",firstElements)
2364+
2365+
# Chop off extra characters if this is too long
2366+
cat(substr(line, 1, MAX_CHAR_PER_ROW))
2367+
cat("\n")
2368+
}
2369+
2370+
if (ncol(localDF) < ncol(object)) {
2371+
cat(paste0("\nDisplaying first ", ncol(localDF), " columns only."))
2372+
}
2373+
}
2374+
})

R/pkg/R/generics.R

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,6 @@ setGeneric("subtractByKey",
378378
setGeneric("value", function(bcast) { standardGeneric("value") })
379379

380380

381-
382381
#################### DataFrame Methods ########################
383382

384383
#' @rdname agg
@@ -389,6 +388,14 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") })
389388
#' @export
390389
setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") })
391390

391+
#' @rdname as.data.frame
392+
#' @export
393+
setGeneric("as.data.frame")
394+
395+
#' @rdname attach
396+
#' @export
397+
setGeneric("attach")
398+
392399
#' @rdname columns
393400
#' @export
394401
setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") })
@@ -525,13 +532,12 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) {
525532
standardGeneric("saveAsTable")
526533
})
527534

528-
#' @rdname withColumn
529535
#' @export
530-
setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") })
536+
setGeneric("str")
531537

532-
#' @rdname write.df
538+
#' @rdname mutate
533539
#' @export
534-
setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
540+
setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") })
535541

536542
#' @rdname write.df
537543
#' @export
@@ -593,6 +599,10 @@ setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
593599
#' @export
594600
setGeneric("where", function(x, condition) { standardGeneric("where") })
595601

602+
#' @rdname with
603+
#' @export
604+
setGeneric("with")
605+
596606
#' @rdname withColumn
597607
#' @export
598608
setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn") })
@@ -602,6 +612,9 @@ setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn
602612
setGeneric("withColumnRenamed",
603613
function(x, existingCol, newCol) { standardGeneric("withColumnRenamed") })
604614

615+
#' @rdname write.df
616+
#' @export
617+
setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
605618

606619
###################### Column Methods ##########################
607620

@@ -1109,7 +1122,6 @@ setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
11091122
#' @export
11101123
setGeneric("year", function(x) { standardGeneric("year") })
11111124

1112-
11131125
#' @rdname glm
11141126
#' @export
11151127
setGeneric("glm")
@@ -1121,15 +1133,3 @@ setGeneric("predict", function(object, ...) { standardGeneric("predict") })
11211133
#' @rdname rbind
11221134
#' @export
11231135
setGeneric("rbind", signature = "...")
1124-
1125-
#' @rdname as.data.frame
1126-
#' @export
1127-
setGeneric("as.data.frame")
1128-
1129-
#' @rdname attach
1130-
#' @export
1131-
setGeneric("attach")
1132-
1133-
#' @rdname with
1134-
#' @export
1135-
setGeneric("with")

R/pkg/R/types.R

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,23 @@ COMPLEX_TYPES <- list(
4747
# The full list of data types.
4848
DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
4949

50+
SHORT_TYPES <- as.environment(list(
51+
"character" = "chr",
52+
"logical" = "logi",
53+
"POSIXct" = "POSIXct",
54+
"integer" = "int",
55+
"numeric" = "num",
56+
"raw" = "raw",
57+
"Date" = "Date",
58+
"map" = "map",
59+
"array" = "array",
60+
"struct" = "struct"
61+
))
62+
5063
# An environment for mapping R to Scala, names are R types and values are Scala types.
5164
rToSQLTypes <- as.environment(list(
52-
"integer" = "integer", # in R, integer is 32bit
53-
"numeric" = "double", # in R, numeric == double which is 64bit
54-
"double" = "double",
65+
"integer" = "integer", # in R, integer is 32bit
66+
"numeric" = "double", # in R, numeric == double which is 64bit
67+
"double" = "double",
5568
"character" = "string",
56-
"logical" = "boolean"))
69+
"logical" = "boolean"))

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,6 +1799,37 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
17991799
"Only atomic type is supported for column types")
18001800
})
18011801

1802+
test_that("Method str()", {
1803+
# Structure of Iris
1804+
iris2 <- iris
1805+
colnames(iris2) <- c("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Species")
1806+
iris2$col <- TRUE
1807+
irisDF2 <- createDataFrame(sqlContext, iris2)
1808+
1809+
out <- capture.output(str(irisDF2))
1810+
expect_equal(length(out), 7)
1811+
expect_equal(out[1], "'DataFrame': 6 variables:")
1812+
expect_equal(out[2], " $ Sepal_Length: num 5.1 4.9 4.7 4.6 5 5.4")
1813+
expect_equal(out[3], " $ Sepal_Width : num 3.5 3 3.2 3.1 3.6 3.9")
1814+
expect_equal(out[4], " $ Petal_Length: num 1.4 1.4 1.3 1.5 1.4 1.7")
1815+
expect_equal(out[5], " $ Petal_Width : num 0.2 0.2 0.2 0.2 0.2 0.4")
1816+
expect_equal(out[6], paste0(" $ Species : chr \"setosa\" \"setosa\" \"",
1817+
"setosa\" \"setosa\" \"setosa\" \"setosa\""))
1818+
expect_equal(out[7], " $ col : logi TRUE TRUE TRUE TRUE TRUE TRUE")
1819+
1820+
# A random dataset with many columns. This test is to check str limits
1821+
# the number of columns. Therefore, it will suffice to check for the
1822+
# number of returned rows
1823+
x <- runif(200, 1, 10)
1824+
df <- data.frame(t(as.matrix(data.frame(x,x,x,x,x,x,x,x,x))))
1825+
DF <- createDataFrame(sqlContext, df)
1826+
out <- capture.output(str(DF))
1827+
expect_equal(length(out), 103)
1828+
1829+
# Test utils:::str
1830+
expect_equal(capture.output(utils:::str(iris)), capture.output(str(iris)))
1831+
})
1832+
18021833
unlink(parquetPath)
18031834
unlink(jsonPath)
18041835
unlink(jsonPathNa)

checkstyle.xml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@
5858
<property name="eachLine" value="true"/>
5959
</module>
6060

61+
<module name="RegexpSingleline">
62+
<!-- \s matches whitespace character, $ matches end of line. -->
63+
<property name="format" value="\s+$"/>
64+
<property name="message" value="No trailing whitespace allowed."/>
65+
</module>
66+
6167
<module name="TreeWalker">
6268
<module name="OuterTypeFilename"/>
6369
<module name="IllegalTokenText">
@@ -84,7 +90,7 @@
8490
</module>
8591
<module name="NeedBraces">
8692
<property name="allowSingleLineStatement" value="true"/>
87-
</module>
93+
</module>
8894
<module name="OneStatementPerLine"/>
8995
<module name="ArrayTypeStyle"/>
9096
<module name="FallThrough"/>

core/pom.xml

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -267,33 +267,6 @@
267267
<artifactId>oro</artifactId>
268268
<version>${oro.version}</version>
269269
</dependency>
270-
<dependency>
271-
<groupId>org.tachyonproject</groupId>
272-
<artifactId>tachyon-client</artifactId>
273-
<version>0.8.2</version>
274-
<exclusions>
275-
<exclusion>
276-
<groupId>org.apache.hadoop</groupId>
277-
<artifactId>hadoop-client</artifactId>
278-
</exclusion>
279-
<exclusion>
280-
<groupId>org.apache.curator</groupId>
281-
<artifactId>curator-client</artifactId>
282-
</exclusion>
283-
<exclusion>
284-
<groupId>org.apache.curator</groupId>
285-
<artifactId>curator-framework</artifactId>
286-
</exclusion>
287-
<exclusion>
288-
<groupId>org.apache.curator</groupId>
289-
<artifactId>curator-recipes</artifactId>
290-
</exclusion>
291-
<exclusion>
292-
<groupId>org.tachyonproject</groupId>
293-
<artifactId>tachyon-underfs-glusterfs</artifactId>
294-
</exclusion>
295-
</exclusions>
296-
</dependency>
297270
<dependency>
298271
<groupId>org.seleniumhq.selenium</groupId>
299272
<artifactId>selenium-java</artifactId>

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
243243
private[spark] def eventLogDir: Option[URI] = _eventLogDir
244244
private[spark] def eventLogCodec: Option[String] = _eventLogCodec
245245

246-
// Generate the random name for a temp folder in external block store.
247-
// Add a timestamp as the suffix here to make it more safe
248-
val externalBlockStoreFolderName = "spark-" + randomUUID.toString()
249-
250246
def isLocal: Boolean = (master == "local" || master.startsWith("local["))
251247

252248
/**
@@ -423,8 +419,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
423419
}
424420
}
425421

426-
_conf.set("spark.externalBlockStore.folderName", externalBlockStoreFolderName)
427-
428422
if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
429423

430424
// "_jobProgressListener" should be set up before creating SparkEnv because when creating

0 commit comments

Comments
 (0)