18
18
# mllib_regression.R: Provides methods for MLlib classification algorithms
19
19
# (except for tree-based algorithms) integration
20
20
21
+ # ' S4 class that represents an LinearSVCModel
22
+ # '
23
+ # ' @param jobj a Java object reference to the backing Scala LinearSVCModel
24
+ # ' @export
25
+ # ' @note LinearSVCModel since 2.2.0
26
+ setClass ("LinearSVCModel ", representation(jobj = "jobj"))
27
+
21
28
# ' S4 class that represents an LogisticRegressionModel
22
29
# '
23
30
# ' @param jobj a Java object reference to the backing Scala LogisticRegressionModel
@@ -39,6 +46,131 @@ setClass("MultilayerPerceptronClassificationModel", representation(jobj = "jobj"
39
46
# ' @note NaiveBayesModel since 2.0.0
40
47
setClass ("NaiveBayesModel ", representation(jobj = "jobj"))
41
48
49
+ # ' linear SVM Model
50
+ # '
51
+ # ' Fits an linear SVM model against a SparkDataFrame. It is a binary classifier, similar to svm in glmnet package
52
+ # ' Users can print, make predictions on the produced model and save the model to the input path.
53
+ # '
54
+ # ' @param data SparkDataFrame for training.
55
+ # ' @param formula A symbolic description of the model to be fitted. Currently only a few formula
56
+ # ' operators are supported, including '~', '.', ':', '+', and '-'.
57
+ # ' @param regParam The regularization parameter.
58
+ # ' @param maxIter Maximum iteration number.
59
+ # ' @param tol Convergence tolerance of iterations.
60
+ # ' @param standardization Whether to standardize the training features before fitting the model. The coefficients
61
+ # ' of models will be always returned on the original scale, so it will be transparent for
62
+ # ' users. Note that with/without standardization, the models should be always converged
63
+ # ' to the same solution when no regularization is applied.
64
+ # ' @param threshold The threshold in binary classification, in range [0, 1].
65
+ # ' @param weightCol The weight column name.
66
+ # ' @param aggregationDepth The depth for treeAggregate (greater than or equal to 2). If the dimensions of features
67
+ # ' or the number of partitions are large, this param could be adjusted to a larger size.
68
+ # ' This is an expert parameter. Default value should be good for most cases.
69
+ # ' @param ... additional arguments passed to the method.
70
+ # ' @return \code{spark.svmLinear} returns a fitted linear SVM model.
71
+ # ' @rdname spark.svmLinear
72
+ # ' @aliases spark.svmLinear,SparkDataFrame,formula-method
73
+ # ' @name spark.svmLinear
74
+ # ' @export
75
+ # ' @examples
76
+ # ' \dontrun{
77
+ # ' sparkR.session()
78
+ # ' df <- createDataFrame(iris)
79
+ # ' training <- df[df$Species %in% c("versicolor", "virginica"), ]
80
+ # ' model <- spark.svmLinear(training, Species ~ ., regParam = 0.5)
81
+ # ' summary <- summary(model)
82
+ # '
83
+ # ' # fitted values on training data
84
+ # ' fitted <- predict(model, training)
85
+ # '
86
+ # ' # save fitted model to input path
87
+ # ' path <- "path/to/model"
88
+ # ' write.ml(model, path)
89
+ # '
90
+ # ' # can also read back the saved model and predict
91
+ # ' # Note that summary deos not work on loaded model
92
+ # ' savedModel <- read.ml(path)
93
+ # ' summary(savedModel)
94
+ # ' }
95
+ # ' @note spark.svmLinear since 2.2.0
96
+ setMethod ("spark.svmLinear ", signature(data = "SparkDataFrame", formula = "formula"),
97
+ function (data , formula , regParam = 0.0 , maxIter = 100 , tol = 1E-6 , standardization = TRUE ,
98
+ threshold = 0.0 , weightCol = NULL , aggregationDepth = 2 ) {
99
+ formula <- paste(deparse(formula ), collapse = " " )
100
+
101
+ if (! is.null(weightCol ) && weightCol == " " ) {
102
+ weightCol <- NULL
103
+ } else if (! is.null(weightCol )) {
104
+ weightCol <- as.character(weightCol )
105
+ }
106
+
107
+ jobj <- callJStatic(" org.apache.spark.ml.r.LinearSVCWrapper" , " fit" ,
108
+ data @ sdf , formula , as.numeric(regParam ), as.integer(maxIter ),
109
+ as.numeric(tol ), as.logical(standardization ), as.numeric(threshold ),
110
+ weightCol , as.integer(aggregationDepth ))
111
+ new(" LinearSVCModel" , jobj = jobj )
112
+ })
113
+
114
+ # Predicted values based on an LinearSVCModel model
115
+
116
+ # ' @param newData a SparkDataFrame for testing.
117
+ # ' @return \code{predict} returns the predicted values based on an LinearSVCModel.
118
+ # ' @rdname spark.svmLinear
119
+ # ' @aliases predict,LinearSVCModel,SparkDataFrame-method
120
+ # ' @export
121
+ # ' @note predict(LinearSVCModel) since 2.2.0
122
+ setMethod ("predict ", signature(object = "LinearSVCModel"),
123
+ function (object , newData ) {
124
+ predict_internal(object , newData )
125
+ })
126
+
127
+ # Get the summary of an LinearSVCModel
128
+
129
+ # ' @param object an LinearSVCModel fitted by \code{spark.svmLinear}.
130
+ # ' @return \code{summary} returns summary information of the fitted model, which is a list.
131
+ # ' The list includes \code{coefficients} (coefficients of the fitted model),
132
+ # ' \code{intercept} (intercept of the fitted model), \code{numClasses} (number of classes),
133
+ # ' \code{numFeatures} (number of features).
134
+ # ' @rdname spark.svmLinear
135
+ # ' @aliases summary,LinearSVCModel-method
136
+ # ' @export
137
+ # ' @note summary(LinearSVCModel) since 2.2.0
138
+ setMethod ("summary ", signature(object = "LinearSVCModel"),
139
+ function (object ) {
140
+ jobj <- object @ jobj
141
+ features <- callJMethod(jobj , " features" )
142
+ labels <- callJMethod(jobj , " labels" )
143
+ coefficients <- callJMethod(jobj , " coefficients" )
144
+ nCol <- length(coefficients ) / length(features )
145
+ coefficients <- matrix (unlist(coefficients ), ncol = nCol )
146
+ intercept <- callJMethod(jobj , " intercept" )
147
+ numClasses <- callJMethod(jobj , " numClasses" )
148
+ numFeatures <- callJMethod(jobj , " numFeatures" )
149
+ if (nCol == 1 ) {
150
+ colnames(coefficients ) <- c(" Estimate" )
151
+ } else {
152
+ colnames(coefficients ) <- unlist(labels )
153
+ }
154
+ rownames(coefficients ) <- unlist(features )
155
+ list (coefficients = coefficients , intercept = intercept ,
156
+ numClasses = numClasses , numFeatures = numFeatures )
157
+ })
158
+
159
+ # Save fitted LinearSVCModel to the input path
160
+
161
+ # ' @param path The directory where the model is saved.
162
+ # ' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
163
+ # ' which means throw exception if the output path exists.
164
+ # '
165
+ # ' @rdname spark.svmLinear
166
+ # ' @aliases write.ml,LinearSVCModel,character-method
167
+ # ' @export
168
+ # ' @note write.ml(LogisticRegression, character) since 2.2.0
169
+ setMethod ("write.ml ", signature(object = "LinearSVCModel", path = "character"),
170
+ function (object , path , overwrite = FALSE ) {
171
+ write_internal(object , path , overwrite )
172
+ })
173
+
42
174
# ' Logistic Regression Model
43
175
# '
44
176
# ' Fits an logistic regression model against a SparkDataFrame. It supports "binomial": Binary logistic regression
0 commit comments