Skip to content

Commit 70951d6

Browse files
committed
Merge branch 'master' into leveldb_external_shuffle_service_NM_restart
2 parents 5c71c8c + f10660f commit 70951d6

File tree

329 files changed

+5336
-2775
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

329 files changed

+5336
-2775
lines changed

R/install-dev.bat

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,3 @@ set SPARK_HOME=%~dp0..
2525
MKDIR %SPARK_HOME%\R\lib
2626

2727
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\
28-
29-
rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
30-
pushd %SPARK_HOME%\R\lib
31-
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
32-
popd

R/pkg/DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Collate:
2929
'client.R'
3030
'context.R'
3131
'deserialize.R'
32+
'functions.R'
3233
'mllib.R'
3334
'serialize.R'
3435
'sparkR.R'

R/pkg/NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ exportMethods("abs",
9898
"contains",
9999
"cos",
100100
"cosh",
101+
"concat",
101102
"countDistinct",
102103
"desc",
103104
"endsWith",
@@ -106,10 +107,13 @@ exportMethods("abs",
106107
"floor",
107108
"getField",
108109
"getItem",
110+
"greatest",
109111
"hypot",
110112
"isNotNull",
111113
"isNull",
114+
"lit",
112115
"last",
116+
"least",
113117
"like",
114118
"log",
115119
"log10",

R/pkg/R/column.R

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,6 @@ operators <- list(
6060
)
6161
column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
6262
column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
63-
functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
64-
"first", "last", "lower", "upper", "sumDistinct",
65-
"acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
66-
"expm1", "floor", "log", "log10", "log1p", "rint", "sign",
67-
"sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
68-
binary_mathfunctions <- c("atan2", "hypot")
6963

7064
createOperator <- function(op) {
7165
setMethod(op,
@@ -111,33 +105,6 @@ createColumnFunction2 <- function(name) {
111105
})
112106
}
113107

114-
createStaticFunction <- function(name) {
115-
setMethod(name,
116-
signature(x = "Column"),
117-
function(x) {
118-
if (name == "ceiling") {
119-
name <- "ceil"
120-
}
121-
if (name == "sign") {
122-
name <- "signum"
123-
}
124-
jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
125-
column(jc)
126-
})
127-
}
128-
129-
createBinaryMathfunctions <- function(name) {
130-
setMethod(name,
131-
signature(y = "Column"),
132-
function(y, x) {
133-
if (class(x) == "Column") {
134-
x <- x@jc
135-
}
136-
jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
137-
column(jc)
138-
})
139-
}
140-
141108
createMethods <- function() {
142109
for (op in names(operators)) {
143110
createOperator(op)
@@ -148,12 +115,6 @@ createMethods <- function() {
148115
for (name in column_functions2) {
149116
createColumnFunction2(name)
150117
}
151-
for (x in functions) {
152-
createStaticFunction(x)
153-
}
154-
for (name in binary_mathfunctions) {
155-
createBinaryMathfunctions(name)
156-
}
157118
}
158119

159120
createMethods()
@@ -242,45 +203,3 @@ setMethod("%in%",
242203
jc <- callJMethod(x@jc, "in", table)
243204
return(column(jc))
244205
})
245-
246-
#' Approx Count Distinct
247-
#'
248-
#' @rdname column
249-
#' @return the approximate number of distinct items in a group.
250-
setMethod("approxCountDistinct",
251-
signature(x = "Column"),
252-
function(x, rsd = 0.95) {
253-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
254-
column(jc)
255-
})
256-
257-
#' Count Distinct
258-
#'
259-
#' @rdname column
260-
#' @return the number of distinct items in a group.
261-
setMethod("countDistinct",
262-
signature(x = "Column"),
263-
function(x, ...) {
264-
jcol <- lapply(list(...), function (x) {
265-
x@jc
266-
})
267-
jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
268-
listToSeq(jcol))
269-
column(jc)
270-
})
271-
272-
#' @rdname column
273-
#' @aliases countDistinct
274-
setMethod("n_distinct",
275-
signature(x = "Column"),
276-
function(x, ...) {
277-
countDistinct(x, ...)
278-
})
279-
280-
#' @rdname column
281-
#' @aliases count
282-
setMethod("n",
283-
signature(x = "Column"),
284-
function(x) {
285-
count(x)
286-
})

R/pkg/R/deserialize.R

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
176176

177177
# Take a single column as Array[Byte] and deserialize it into an atomic vector
178178
readCol <- function(inputCon, numRows) {
179-
# sapply can not work with POSIXlt
180-
do.call(c, lapply(1:numRows, function(x) {
181-
value <- readObject(inputCon)
182-
# Replace NULL with NA so we can coerce to vectors
183-
if (is.null(value)) NA else value
184-
}))
179+
if (numRows > 0) {
180+
# sapply can not work with POSIXlt
181+
do.call(c, lapply(1:numRows, function(x) {
182+
value <- readObject(inputCon)
183+
# Replace NULL with NA so we can coerce to vectors
184+
if (is.null(value)) NA else value
185+
}))
186+
} else {
187+
vector()
188+
}
185189
}

R/pkg/R/functions.R

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
#' @include generics.R column.R
19+
NULL
20+
21+
#' @title S4 expression functions for DataFrame column(s)
22+
#' @description These are expression functions on DataFrame columns
23+
24+
functions1 <- c(
25+
"abs", "acos", "approxCountDistinct", "ascii", "asin", "atan",
26+
"avg", "base64", "bin", "bitwiseNOT", "cbrt", "ceil", "cos", "cosh", "count",
27+
"crc32", "dayofmonth", "dayofyear", "exp", "explode", "expm1", "factorial",
28+
"first", "floor", "hex", "hour", "initcap", "isNaN", "last", "last_day",
29+
"length", "log", "log10", "log1p", "log2", "lower", "ltrim", "max", "md5",
30+
"mean", "min", "minute", "month", "negate", "quarter", "reverse",
31+
"rint", "round", "rtrim", "second", "sha1", "signum", "sin", "sinh", "size",
32+
"soundex", "sqrt", "sum", "sumDistinct", "tan", "tanh", "toDegrees",
33+
"toRadians", "to_date", "trim", "unbase64", "unhex", "upper", "weekofyear",
34+
"year")
35+
functions2 <- c(
36+
"atan2", "datediff", "hypot", "levenshtein", "months_between", "nanvl", "pmod")
37+
38+
createFunction1 <- function(name) {
39+
setMethod(name,
40+
signature(x = "Column"),
41+
function(x) {
42+
jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
43+
column(jc)
44+
})
45+
}
46+
47+
createFunction2 <- function(name) {
48+
setMethod(name,
49+
signature(y = "Column"),
50+
function(y, x) {
51+
if (class(x) == "Column") {
52+
x <- x@jc
53+
}
54+
jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
55+
column(jc)
56+
})
57+
}
58+
59+
createFunctions <- function() {
60+
for (name in functions1) {
61+
createFunction1(name)
62+
}
63+
for (name in functions2) {
64+
createFunction2(name)
65+
}
66+
}
67+
68+
createFunctions()
69+
70+
#' @rdname functions
71+
#' @return Creates a Column class of literal value.
72+
setMethod("lit", signature("ANY"),
73+
function(x) {
74+
jc <- callJStatic("org.apache.spark.sql.functions", "lit", ifelse(class(x) == "Column", x@jc, x))
75+
column(jc)
76+
})
77+
78+
#' Approx Count Distinct
79+
#'
80+
#' @rdname functions
81+
#' @return the approximate number of distinct items in a group.
82+
setMethod("approxCountDistinct",
83+
signature(x = "Column"),
84+
function(x, rsd = 0.95) {
85+
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
86+
column(jc)
87+
})
88+
89+
#' Count Distinct
90+
#'
91+
#' @rdname functions
92+
#' @return the number of distinct items in a group.
93+
setMethod("countDistinct",
94+
signature(x = "Column"),
95+
function(x, ...) {
96+
jcol <- lapply(list(...), function (x) {
97+
x@jc
98+
})
99+
jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
100+
listToSeq(jcol))
101+
column(jc)
102+
})
103+
104+
#' @rdname functions
105+
#' @return Concatenates multiple input string columns together into a single string column.
106+
setMethod("concat",
107+
signature(x = "Column"),
108+
function(x, ...) {
109+
jcols <- lapply(list(x, ...), function(x) { x@jc })
110+
jc <- callJStatic("org.apache.spark.sql.functions", "concat", listToSeq(jcols))
111+
column(jc)
112+
})
113+
114+
#' @rdname functions
115+
#' @return Returns the greatest value of the list of column names, skipping null values.
116+
#' This function takes at least 2 parameters. It will return null if all parameters are null.
117+
setMethod("greatest",
118+
signature(x = "Column"),
119+
function(x, ...) {
120+
stopifnot(length(list(...)) > 0)
121+
jcols <- lapply(list(x, ...), function(x) { x@jc })
122+
jc <- callJStatic("org.apache.spark.sql.functions", "greatest", listToSeq(jcols))
123+
column(jc)
124+
})
125+
126+
#' @rdname functions
127+
#' @return Returns the least value of the list of column names, skipping null values.
128+
#' This function takes at least 2 parameters. It will return null iff all parameters are null.
129+
setMethod("least",
130+
signature(x = "Column"),
131+
function(x, ...) {
132+
stopifnot(length(list(...)) > 0)
133+
jcols <- lapply(list(x, ...), function(x) { x@jc })
134+
jc <- callJStatic("org.apache.spark.sql.functions", "least", listToSeq(jcols))
135+
column(jc)
136+
})
137+
138+
#' @rdname functions
139+
#' @aliases ceil
140+
setMethod("ceiling",
141+
signature(x = "Column"),
142+
function(x) {
143+
ceil(x)
144+
})
145+
146+
#' @rdname functions
147+
#' @aliases signum
148+
setMethod("sign", signature(x = "Column"),
149+
function(x) {
150+
signum(x)
151+
})
152+
153+
#' @rdname functions
154+
#' @aliases countDistinct
155+
setMethod("n_distinct", signature(x = "Column"),
156+
function(x, ...) {
157+
countDistinct(x, ...)
158+
})
159+
160+
#' @rdname functions
161+
#' @aliases count
162+
setMethod("n", signature(x = "Column"),
163+
function(x) {
164+
count(x)
165+
})

0 commit comments

Comments
 (0)