Skip to content

Commit be11c4e

Browse files
committed
Merge branch 'master' into configTimeout-6980
2 parents 039afed + f04b567 commit be11c4e

File tree

672 files changed

+20691
-10548
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

672 files changed

+20691
-10548
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ scalastyle-output.xml
6666
R-unit-tests.log
6767
R/unit-tests.out
6868
python/lib/pyspark.zip
69+
lint-r-report.log
6970

7071
# For Hive
7172
metastore_db/

.rat-excludes

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ spark-env.sh
2828
spark-env.cmd
2929
spark-env.sh.template
3030
log4j-defaults.properties
31+
log4j-defaults-repl.properties
3132
bootstrap-tooltip.js
3233
jquery-1.11.1.min.js
3334
d3.min.js
@@ -80,6 +81,9 @@ local-1425081759269/*
8081
local-1426533911241/*
8182
local-1426633911242/*
8283
local-1430917381534/*
84+
local-1430917381535_1
85+
local-1430917381535_2
8386
DESCRIPTION
8487
NAMESPACE
8588
test_support/*
89+
.lintr

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,3 +950,4 @@ The following components are provided under the MIT License. See project link fo
950950
(MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
951951
(The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
952952
(MIT License) jquery (https://jquery.org/license/)
953+
(MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)

R/create-docs.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@
2323
# After running this script the html docs can be found in
2424
# $SPARK_HOME/R/pkg/html
2525

26+
set -o pipefail
27+
set -e
28+
2629
# Figure out where the script is
2730
export FWDIR="$(cd "`dirname "$0"`"; pwd)"
2831
pushd $FWDIR
2932

30-
# Generate Rd file
31-
Rscript -e 'library(devtools); devtools::document(pkg="./pkg", roclets=c("rd"))'
32-
33-
# Install the package
33+
# Install the package (this will also generate the Rd files)
3434
./install-dev.sh
3535

3636
# Now create HTML files

R/install-dev.sh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,20 @@
2626
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
2727
# to load the SparkR package on the worker nodes.
2828

29+
set -o pipefail
30+
set -e
2931

3032
FWDIR="$(cd `dirname $0`; pwd)"
3133
LIB_DIR="$FWDIR/lib"
3234

3335
mkdir -p $LIB_DIR
3436

35-
# Install R
37+
pushd $FWDIR
38+
39+
# Generate Rd files if devtools is installed
40+
Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
41+
42+
# Install SparkR to $LIB_DIR
3643
R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
44+
45+
popd

R/log4j.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
log4j.rootCategory=INFO, file
2020
log4j.appender.file=org.apache.log4j.FileAppender
2121
log4j.appender.file.append=true
22-
log4j.appender.file.file=R-unit-tests.log
22+
log4j.appender.file.file=R/target/unit-tests.log
2323
log4j.appender.file.layout=org.apache.log4j.PatternLayout
2424
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
2525

R/pkg/.lintr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
2+
exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")

R/pkg/NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ export("sparkR.init")
1010
export("sparkR.stop")
1111
export("print.jobj")
1212

13+
# Job group lifecycle management methods
14+
export("setJobGroup",
15+
"clearJobGroup",
16+
"cancelJobGroup")
17+
1318
exportClasses("DataFrame")
1419

1520
exportMethods("arrange",

R/pkg/R/DataFrame.R

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ setClass("DataFrame",
3838
setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
3939
.Object@env <- new.env()
4040
.Object@env$isCached <- isCached
41-
41+
4242
.Object@sdf <- sdf
4343
.Object
4444
})
@@ -55,11 +55,11 @@ dataFrame <- function(sdf, isCached = FALSE) {
5555
############################ DataFrame Methods ##############################################
5656

5757
#' Print Schema of a DataFrame
58-
#'
58+
#'
5959
#' Prints out the schema in tree format
60-
#'
60+
#'
6161
#' @param x A SparkSQL DataFrame
62-
#'
62+
#'
6363
#' @rdname printSchema
6464
#' @export
6565
#' @examples
@@ -78,11 +78,11 @@ setMethod("printSchema",
7878
})
7979

8080
#' Get schema object
81-
#'
81+
#'
8282
#' Returns the schema of this DataFrame as a structType object.
83-
#'
83+
#'
8484
#' @param x A SparkSQL DataFrame
85-
#'
85+
#'
8686
#' @rdname schema
8787
#' @export
8888
#' @examples
@@ -100,9 +100,9 @@ setMethod("schema",
100100
})
101101

102102
#' Explain
103-
#'
103+
#'
104104
#' Print the logical and physical Catalyst plans to the console for debugging.
105-
#'
105+
#'
106106
#' @param x A SparkSQL DataFrame
107107
#' @param extended Logical. If extended is False, explain() only prints the physical plan.
108108
#' @rdname explain
@@ -200,11 +200,11 @@ setMethod("show", "DataFrame",
200200
})
201201

202202
#' DataTypes
203-
#'
203+
#'
204204
#' Return all column names and their data types as a list
205-
#'
205+
#'
206206
#' @param x A SparkSQL DataFrame
207-
#'
207+
#'
208208
#' @rdname dtypes
209209
#' @export
210210
#' @examples
@@ -224,11 +224,11 @@ setMethod("dtypes",
224224
})
225225

226226
#' Column names
227-
#'
227+
#'
228228
#' Return all column names as a list
229-
#'
229+
#'
230230
#' @param x A SparkSQL DataFrame
231-
#'
231+
#'
232232
#' @rdname columns
233233
#' @export
234234
#' @examples
@@ -256,12 +256,12 @@ setMethod("names",
256256
})
257257

258258
#' Register Temporary Table
259-
#'
259+
#'
260260
#' Registers a DataFrame as a Temporary Table in the SQLContext
261-
#'
261+
#'
262262
#' @param x A SparkSQL DataFrame
263263
#' @param tableName A character vector containing the name of the table
264-
#'
264+
#'
265265
#' @rdname registerTempTable
266266
#' @export
267267
#' @examples
@@ -306,11 +306,11 @@ setMethod("insertInto",
306306
})
307307

308308
#' Cache
309-
#'
309+
#'
310310
#' Persist with the default storage level (MEMORY_ONLY).
311-
#'
311+
#'
312312
#' @param x A SparkSQL DataFrame
313-
#'
313+
#'
314314
#' @rdname cache-methods
315315
#' @export
316316
#' @examples
@@ -400,7 +400,7 @@ setMethod("repartition",
400400
signature(x = "DataFrame", numPartitions = "numeric"),
401401
function(x, numPartitions) {
402402
sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
403-
dataFrame(sdf)
403+
dataFrame(sdf)
404404
})
405405

406406
# toJSON
@@ -489,7 +489,7 @@ setMethod("distinct",
489489
#' sqlContext <- sparkRSQL.init(sc)
490490
#' path <- "path/to/file.json"
491491
#' df <- jsonFile(sqlContext, path)
492-
#' collect(sample(df, FALSE, 0.5))
492+
#' collect(sample(df, FALSE, 0.5))
493493
#' collect(sample(df, TRUE, 0.5))
494494
#'}
495495
setMethod("sample",
@@ -513,11 +513,11 @@ setMethod("sample_frac",
513513
})
514514

515515
#' Count
516-
#'
516+
#'
517517
#' Returns the number of rows in a DataFrame
518-
#'
518+
#'
519519
#' @param x A SparkSQL DataFrame
520-
#'
520+
#'
521521
#' @rdname count
522522
#' @export
523523
#' @examples
@@ -568,13 +568,13 @@ setMethod("collect",
568568
})
569569

570570
#' Limit
571-
#'
571+
#'
572572
#' Limit the resulting DataFrame to the number of rows specified.
573-
#'
573+
#'
574574
#' @param x A SparkSQL DataFrame
575575
#' @param num The number of rows to return
576576
#' @return A new DataFrame containing the number of rows specified.
577-
#'
577+
#'
578578
#' @rdname limit
579579
#' @export
580580
#' @examples
@@ -593,7 +593,7 @@ setMethod("limit",
593593
})
594594

595595
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
596-
#'
596+
#'
597597
#' @rdname take
598598
#' @export
599599
#' @examples
@@ -613,8 +613,8 @@ setMethod("take",
613613

614614
#' Head
615615
#'
616-
#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
617-
#' then head() returns the first 6 rows in keeping with the current data.frame
616+
#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
617+
#' then head() returns the first 6 rows in keeping with the current data.frame
618618
#' convention in R.
619619
#'
620620
#' @param x A SparkSQL DataFrame
@@ -659,11 +659,11 @@ setMethod("first",
659659
})
660660

661661
# toRDD()
662-
#
662+
#
663663
# Converts a Spark DataFrame to an RDD while preserving column names.
664-
#
664+
#
665665
# @param x A Spark DataFrame
666-
#
666+
#
667667
# @rdname DataFrame
668668
# @export
669669
# @examples
@@ -1167,7 +1167,7 @@ setMethod("where",
11671167
#'
11681168
#' @param x A Spark DataFrame
11691169
#' @param y A Spark DataFrame
1170-
#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
1170+
#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
11711171
#' Column expression. If joinExpr is omitted, join() wil perform a Cartesian join
11721172
#' @param joinType The type of join to perform. The following join types are available:
11731173
#' 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'. The default joinType is "inner".
@@ -1303,7 +1303,7 @@ setMethod("except",
13031303
#' @param source A name for external data source
13041304
#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
13051305
#'
1306-
#' @rdname write.df
1306+
#' @rdname write.df
13071307
#' @export
13081308
#' @examples
13091309
#'\dontrun{
@@ -1401,7 +1401,7 @@ setMethod("saveAsTable",
14011401
#' @param col A string of name
14021402
#' @param ... Additional expressions
14031403
#' @return A DataFrame
1404-
#' @rdname describe
1404+
#' @rdname describe
14051405
#' @export
14061406
#' @examples
14071407
#'\dontrun{
@@ -1444,7 +1444,7 @@ setMethod("describe",
14441444
#' This overwrites the how parameter.
14451445
#' @param cols Optional list of column names to consider.
14461446
#' @return A DataFrame
1447-
#'
1447+
#'
14481448
#' @rdname nafunctions
14491449
#' @export
14501450
#' @examples
@@ -1465,7 +1465,7 @@ setMethod("dropna",
14651465
if (is.null(minNonNulls)) {
14661466
minNonNulls <- if (how == "any") { length(cols) } else { 1 }
14671467
}
1468-
1468+
14691469
naFunctions <- callJMethod(x@sdf, "na")
14701470
sdf <- callJMethod(naFunctions, "drop",
14711471
as.integer(minNonNulls), listToSeq(as.list(cols)))
@@ -1488,16 +1488,16 @@ setMethod("na.omit",
14881488
#' @param value Value to replace null values with.
14891489
#' Should be an integer, numeric, character or named list.
14901490
#' If the value is a named list, then cols is ignored and
1491-
#' value must be a mapping from column name (character) to
1491+
#' value must be a mapping from column name (character) to
14921492
#' replacement value. The replacement value must be an
14931493
#' integer, numeric or character.
14941494
#' @param cols optional list of column names to consider.
14951495
#' Columns specified in cols that do not have matching data
1496-
#' type are ignored. For example, if value is a character, and
1496+
#' type are ignored. For example, if value is a character, and
14971497
#' subset contains a non-character column, then the non-character
14981498
#' column is simply ignored.
14991499
#' @return A DataFrame
1500-
#'
1500+
#'
15011501
#' @rdname nafunctions
15021502
#' @export
15031503
#' @examples
@@ -1515,14 +1515,14 @@ setMethod("fillna",
15151515
if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
15161516
stop("value should be an integer, numeric, charactor or named list.")
15171517
}
1518-
1518+
15191519
if (class(value) == "list") {
15201520
# Check column names in the named list
15211521
colNames <- names(value)
15221522
if (length(colNames) == 0 || !all(colNames != "")) {
15231523
stop("value should be an a named list with each name being a column name.")
15241524
}
1525-
1525+
15261526
# Convert to the named list to an environment to be passed to JVM
15271527
valueMap <- new.env()
15281528
for (col in colNames) {
@@ -1533,19 +1533,19 @@ setMethod("fillna",
15331533
}
15341534
valueMap[[col]] <- v
15351535
}
1536-
1536+
15371537
# When value is a named list, caller is expected not to pass in cols
15381538
if (!is.null(cols)) {
15391539
warning("When value is a named list, cols is ignored!")
15401540
cols <- NULL
15411541
}
1542-
1542+
15431543
value <- valueMap
15441544
} else if (is.integer(value)) {
15451545
# Cast an integer to a numeric
15461546
value <- as.numeric(value)
15471547
}
1548-
1548+
15491549
naFunctions <- callJMethod(x@sdf, "na")
15501550
sdf <- if (length(cols) == 0) {
15511551
callJMethod(naFunctions, "fill", value)

0 commit comments

Comments
 (0)