@@ -54,9 +54,9 @@ infer_type <- function(x) {
54
54
# StructType
55
55
types <- lapply(x , infer_type )
56
56
fields <- lapply(1 : length(x ), function (i ) {
57
- list ( name = names [[i ]], type = types [[i ]], nullable = TRUE )
57
+ field( names [[i ]], types [[i ]], TRUE )
58
58
})
59
- list ( type = " struct " , fields = fields )
59
+ do.call( buildSchema , fields )
60
60
}
61
61
} else if (length(x ) > 1 ) {
62
62
list (type = " array" , elementType = type , containsNull = TRUE )
@@ -67,19 +67,19 @@ infer_type <- function(x) {
67
67
68
68
# ' dump the schema into JSON string
69
69
tojson <- function (x ) {
70
- if (is.list(x )) {
70
+ if (inherits(x , " struct" )) {
71
+ # schema object
72
+ l <- paste(lapply(x , tojson ), collapse = " , " )
73
+ paste(' {\" type\" :\" struct\" , \" fields\" :' ,' [' , l , ' ]}' , sep = ' ' )
74
+ } else if (inherits(x , " field" )) {
75
+ # field object
71
76
names <- names(x )
72
- if (! is.null(names )) {
73
- items <- lapply(names , function (n ) {
74
- safe_n <- gsub(' "' , ' \\ "' , n )
75
- paste(tojson(safe_n ), ' :' , tojson(x [[n ]]), sep = ' ' )
76
- })
77
- d <- paste(items , collapse = ' , ' )
78
- paste(' {' , d , ' }' , sep = ' ' )
79
- } else {
80
- l <- paste(lapply(x , tojson ), collapse = ' , ' )
81
- paste(' [' , l , ' ]' , sep = ' ' )
82
- }
77
+ items <- lapply(names , function (n ) {
78
+ safe_n <- gsub(' "' , ' \\ "' , n )
79
+ paste(tojson(safe_n ), ' :' , tojson(x [[n ]]), sep = ' ' )
80
+ })
81
+ d <- paste(items , collapse = " , " )
82
+ paste(' {' , d , ' }' , sep = ' ' )
83
83
} else if (is.character(x )) {
84
84
paste(' "' , x , ' "' , sep = ' ' )
85
85
} else if (is.logical(x )) {
@@ -134,7 +134,7 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
134
134
stop(paste(" unexpected type:" , class(data )))
135
135
}
136
136
137
- if (is.null(schema ) || is.null(names(schema ))) {
137
+ if (is.null(schema ) || ( ! inherits( schema , " struct " ) && is.null(names(schema ) ))) {
138
138
row <- first(rdd )
139
139
names <- if (is.null(schema )) {
140
140
names(row )
@@ -143,7 +143,7 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
143
143
}
144
144
if (is.null(names )) {
145
145
names <- lapply(1 : length(row ), function (x ) {
146
- paste(" _" , as.character(x ), sep = " " )
146
+ paste(" _" , as.character(x ), sep = " " )
147
147
})
148
148
}
149
149
@@ -159,14 +159,12 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) {
159
159
160
160
types <- lapply(row , infer_type )
161
161
fields <- lapply(1 : length(row ), function (i ) {
162
- list ( name = names [[i ]], type = types [[i ]], nullable = TRUE )
162
+ field( names [[i ]], types [[i ]], TRUE )
163
163
})
164
- schema <- list ( type = " struct " , fields = fields )
164
+ schema <- do.call( buildSchema , fields )
165
165
}
166
166
167
- stopifnot(class(schema ) == " list" )
168
- stopifnot(schema $ type == " struct" )
169
- stopifnot(class(schema $ fields ) == " list" )
167
+ stopifnot(class(schema ) == " struct" )
170
168
schemaString <- tojson(schema )
171
169
172
170
jrdd <- getJRDD(lapply(rdd , function (x ) x ), " row" )
@@ -518,3 +516,37 @@ createExternalTable <- function(sqlCtx, tableName, path = NULL, source = NULL, .
518
516
sdf <- callJMethod(sqlCtx , " createExternalTable" , tableName , source , options )
519
517
dataFrame(sdf )
520
518
}
519
+
520
+ buildSchema <- function (field , ... ) {
521
+ fields <- list (field , ... )
522
+ if (! all(sapply(fields , inherits , " field" ))) {
523
+ stop(" All arguments must be Field objects." )
524
+ }
525
+
526
+ structure(fields , class = " struct" )
527
+ }
528
+
529
+ print.struct <- function (x , ... ) {
530
+ cat(sapply(x , function (field ) { paste(" |-" , " name = \" " , field $ name ,
531
+ " \" , type = \" " , field $ type ,
532
+ " \" , nullable = " , field $ nullable , " \n " ,
533
+ sep = " " ) })
534
+ , sep = " " )
535
+ }
536
+
537
+ field <- function (name , type , nullable = TRUE ) {
538
+ if (class(name ) != " character" ) {
539
+ stop(" Field name must be a string." )
540
+ }
541
+ if (class(type ) != " character" ) {
542
+ stop(" Field type must be a string." )
543
+ }
544
+ if (class(nullable ) != " logical" ) {
545
+ stop(" nullable must be either TRUE or FALSE" )
546
+ }
547
+ structure(list (" name" = name , " type" = type , " nullable" = nullable ), class = " field" )
548
+ }
549
+
550
+ print.field <- function (x , ... ) {
551
+ cat(" name = \" " , x $ name , " \" , type = \" " , x $ type , " \" , nullable = " , x $ nullable , sep = " " )
552
+ }
0 commit comments