Skip to content

Commit 0e2a94f

Browse files
cafreemanDavies Liu
cafreeman
authored and
Davies Liu
committed
Define functions for schema and fields
Instead of using a list[list[list[]]], use specific constructors for schema and field objects.
1 parent 2fe0a1a commit 0e2a94f

File tree

3 files changed

+76
-31
lines changed

3 files changed

+76
-31
lines changed

R/pkg/NAMESPACE

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ export("cacheTable",
171171
"jsonRDD",
172172
"loadDF",
173173
"parquetFile",
174+
"buildSchema",
175+
"field",
174176
"sql",
175177
"table",
176178
"tableNames",
@@ -179,4 +181,6 @@ export("cacheTable",
179181
"uncacheTable")
180182

181183
export("print.structType",
182-
"print.structField")
184+
"print.structField",
185+
"print.struct",
186+
"print.field")

R/pkg/R/SQLContext.R

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ infer_type <- function(x) {
5454
# StructType
5555
types <- lapply(x, infer_type)
5656
fields <- lapply(1:length(x), function(i) {
57-
list(name = names[[i]], type = types[[i]], nullable = TRUE)
57+
field(names[[i]], types[[i]], TRUE)
5858
})
59-
list(type = "struct", fields = fields)
59+
do.call(buildSchema, fields)
6060
}
6161
} else if (length(x) > 1) {
6262
list(type = "array", elementType = type, containsNull = TRUE)
@@ -67,19 +67,19 @@ infer_type <- function(x) {
6767

6868
#' dump the schema into JSON string
6969
tojson <- function(x) {
70-
if (is.list(x)) {
70+
if (inherits(x, "struct")) {
71+
# schema object
72+
l <- paste(lapply(x, tojson), collapse = ", ")
73+
paste('{\"type\":\"struct\", \"fields\":','[', l, ']}', sep = '')
74+
} else if (inherits(x, "field")) {
75+
# field object
7176
names <- names(x)
72-
if (!is.null(names)) {
73-
items <- lapply(names, function(n) {
74-
safe_n <- gsub('"', '\\"', n)
75-
paste(tojson(safe_n), ':', tojson(x[[n]]), sep = '')
76-
})
77-
d <- paste(items, collapse = ', ')
78-
paste('{', d, '}', sep = '')
79-
} else {
80-
l <- paste(lapply(x, tojson), collapse = ', ')
81-
paste('[', l, ']', sep = '')
82-
}
77+
items <- lapply(names, function(n) {
78+
safe_n <- gsub('"', '\\"', n)
79+
paste(tojson(safe_n), ':', tojson(x[[n]]), sep = '')
80+
})
81+
d <- paste(items, collapse = ", ")
82+
paste('{', d, '}', sep = '')
8383
} else if (is.character(x)) {
8484
paste('"', x, '"', sep = '')
8585
} else if (is.logical(x)) {
@@ -134,7 +134,7 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
134134
stop(paste("unexpected type:", class(data)))
135135
}
136136

137-
if (is.null(schema) || is.null(names(schema))) {
137+
if (is.null(schema) || (!inherits(schema, "struct") && is.null(names(schema)))) {
138138
row <- first(rdd)
139139
names <- if (is.null(schema)) {
140140
names(row)
@@ -143,7 +143,7 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
143143
}
144144
if (is.null(names)) {
145145
names <- lapply(1:length(row), function(x) {
146-
paste("_", as.character(x), sep = "")
146+
paste("_", as.character(x), sep = "")
147147
})
148148
}
149149

@@ -159,14 +159,12 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
159159

160160
types <- lapply(row, infer_type)
161161
fields <- lapply(1:length(row), function(i) {
162-
list(name = names[[i]], type = types[[i]], nullable = TRUE)
162+
field(names[[i]], types[[i]], TRUE)
163163
})
164-
schema <- list(type = "struct", fields = fields)
164+
schema <- do.call(buildSchema, fields)
165165
}
166166

167-
stopifnot(class(schema) == "list")
168-
stopifnot(schema$type == "struct")
169-
stopifnot(class(schema$fields) == "list")
167+
stopifnot(class(schema) == "struct")
170168
schemaString <- tojson(schema)
171169

172170
jrdd <- getJRDD(lapply(rdd, function(x) x), "row")
@@ -518,3 +516,37 @@ createExternalTable <- function(sqlCtx, tableName, path = NULL, source = NULL, .
518516
sdf <- callJMethod(sqlCtx, "createExternalTable", tableName, source, options)
519517
dataFrame(sdf)
520518
}
519+
520+
buildSchema <- function(field, ...) {
521+
fields <- list(field, ...)
522+
if (!all(sapply(fields, inherits, "field"))) {
523+
stop("All arguments must be Field objects.")
524+
}
525+
526+
structure(fields, class = "struct")
527+
}
528+
529+
print.struct <- function(x, ...) {
530+
cat(sapply(x, function(field) { paste("|-", "name = \"", field$name,
531+
"\", type = \"", field$type,
532+
"\", nullable = ", field$nullable, "\n",
533+
sep = "") })
534+
, sep = "")
535+
}
536+
537+
field <- function(name, type, nullable = TRUE) {
538+
if (class(name) != "character") {
539+
stop("Field name must be a string.")
540+
}
541+
if (class(type) != "character") {
542+
stop("Field type must be a string.")
543+
}
544+
if (class(nullable) != "logical") {
545+
stop("nullable must be either TRUE or FALSE")
546+
}
547+
structure(list("name" = name, "type" = type, "nullable" = nullable), class = "field")
548+
}
549+
550+
print.field <- function(x, ...) {
551+
cat("name = \"", x$name, "\", type = \"", x$type, "\", nullable = ", x$nullable, sep = "")
552+
}

R/pkg/inst/tests/test_sparkSQL.R

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,27 @@ test_that("infer types", {
4444
expect_equal(infer_type(list(1L, 2L)),
4545
list(type = 'array', elementType = "integer", containsNull = TRUE))
4646
expect_equal(infer_type(list(a = 1L, b = "2")),
47-
list(type = "struct",
48-
fields = list(list(name = "a", type = "integer", nullable = TRUE),
49-
list(name = "b", type = "string", nullable = TRUE))))
47+
buildSchema(field(name = "a", type = "integer", nullable = TRUE),
48+
field(name = "b", type = "string", nullable = TRUE)))
5049
e <- new.env()
5150
assign("a", 1L, envir = e)
5251
expect_equal(infer_type(e),
5352
list(type = "map", keyType = "string", valueType = "integer",
5453
valueContainsNull = TRUE))
5554
})
5655

56+
test_that("buildSchema and field", {
57+
testField <- field("a", "string")
58+
expect_true(inherits(testField, "field"))
59+
expect_true(testField$name == "a")
60+
expect_true(testField$nullable)
61+
62+
testSchema <- buildSchema(testField, field("b", "integer"))
63+
expect_true(inherits(testSchema, "struct"))
64+
expect_true(inherits(testSchema[[2]], "field"))
65+
expect_true(testSchema[[1]]$type == "string")
66+
})
67+
5768
test_that("create DataFrame from RDD", {
5869
rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
5970
df <- createDataFrame(sqlCtx, rdd, list("a", "b"))
@@ -66,9 +77,8 @@ test_that("create DataFrame from RDD", {
6677
expect_true(inherits(df, "DataFrame"))
6778
expect_equal(columns(df), c("_1", "_2"))
6879

69-
fields <- list(list(name = "a", type = "integer", nullable = TRUE),
70-
list(name = "b", type = "string", nullable = TRUE))
71-
schema <- list(type = "struct", fields = fields)
80+
schema <- buildSchema(field(name = "a", type = "integer", nullable = TRUE),
81+
field(name = "b", type = "string", nullable = TRUE))
7282
df <- createDataFrame(sqlCtx, rdd, schema)
7383
expect_true(inherits(df, "DataFrame"))
7484
expect_equal(columns(df), c("a", "b"))
@@ -94,9 +104,8 @@ test_that("toDF", {
94104
expect_true(inherits(df, "DataFrame"))
95105
expect_equal(columns(df), c("_1", "_2"))
96106

97-
fields <- list(list(name = "a", type = "integer", nullable = TRUE),
98-
list(name = "b", type = "string", nullable = TRUE))
99-
schema <- list(type = "struct", fields = fields)
107+
schema <- buildSchema(field(name = "a", type = "integer", nullable = TRUE),
108+
field(name = "b", type = "string", nullable = TRUE))
100109
df <- toDF(rdd, schema)
101110
expect_true(inherits(df, "DataFrame"))
102111
expect_equal(columns(df), c("a", "b"))

0 commit comments

Comments
 (0)