1
1
# ' \code{stat_bin} is suitable only for continuous x data. If your x data is
2
2
# ' discrete, you probably want to use \code{\link{stat_count}}.
3
3
# '
4
- # ' @param binwidth Bin width to use. Defaults to 1/\code{bins} of the range of
5
- # ' the data
6
- # ' @param bins Number of bins. Overridden by \code{binwidth} or \code{breaks}.
7
- # ' Defaults to 30
8
- # ' @param breaks Actual breaks to use. Overrides bin width, bin number and
9
- # ' origin
10
- # ' @param origin Origin of first bin
11
- # ' @param width Width of bars when used with categorical data
12
- # ' @param right If \code{TRUE}, right-closed, left-open, if \code{FALSE},
13
- # ' the default, right-open, left-closed.
14
- # ' @param drop If TRUE, remove all bins with zero counts
4
+ # ' @param binwidth The width of the bins. The default is to use \code{bins}
5
+ # ' bins that cover the range of the data. You should always override
6
+ # ' this value, exploring multiple widths to find the best to illustrate the
7
+ # ' stories in your data.
8
+ # '
9
+ # ' The bin width of a date variable is the number of days in each time; the
10
+ # ' bin width of a time variable is the number of seconds.
11
+ # ' @param bins Number of bins. Overridden by \code{binwidth}. Defaults to 30
12
+ # ' @param center The center of one of the bins. Note that if center is above or
13
+ # ' below the range of the data, things will be shifted by an appropriate
14
+ # ' number of \code{width}s. To center on integers, for example, use
15
+ # ' \code{width=1} and \code{center=0}, even if \code{0} is outside the range
16
+ # ' of the data. At most one of \code{center} and \code{boundary} may be
17
+ # ' specified.
18
+ # ' @param boundary A boundary between two bins. As with \code{center}, things
19
+ # ' are shifted when \code{boundary} is outside the range of the data. For
20
+ # ' example, to center on integers, use \code{width = 1} and \code{boundary =
21
+ # ' 0.5}, even if \code{1} is outside the range of the data. At most one of
22
+ # ' \code{center} and \code{boundary} may be specified.
23
+ # ' @param closed One of \code{"right"} or \code{"left"} indicating whether right
24
+ # ' or left edges of bins are included in the bin.
25
+ # ' @param pad If \code{TRUE}, adds empty bins at either end of x. This ensures
26
+ # ' frequency polygons touch 0. Defaults to \code{FALSE}.
15
27
# ' @section Computed variables:
16
28
# ' \describe{
17
29
# ' \item{count}{number of points in bin}
26
38
# ' @export
27
39
# ' @rdname geom_histogram
28
40
stat_bin <- function (mapping = NULL , data = NULL , geom = " bar" ,
29
- position = " stack" , width = 0.9 , drop = FALSE ,
30
- right = FALSE , binwidth = NULL , bins = NULL , origin = NULL ,
31
- breaks = NULL , na.rm = FALSE ,
32
- show.legend = NA , inherit.aes = TRUE , ... ) {
41
+ position = " stack" , ... ,
42
+ binwidth = NULL , bins = NULL , center = NULL , boundary = NULL ,
43
+ closed = c(" right" , " left" ), pad = FALSE ,
44
+ na.rm = FALSE , show.legend = NA , inherit.aes = TRUE ) {
45
+
33
46
layer(
34
47
data = data ,
35
48
mapping = mapping ,
@@ -39,13 +52,12 @@ stat_bin <- function(mapping = NULL, data = NULL, geom = "bar",
39
52
show.legend = show.legend ,
40
53
inherit.aes = inherit.aes ,
41
54
params = list (
42
- width = width ,
43
- drop = drop ,
44
- right = right ,
45
- bins = bins ,
46
55
binwidth = binwidth ,
47
- origin = origin ,
48
- breaks = breaks ,
56
+ bins = bins ,
57
+ center = center ,
58
+ boundary = boundary ,
59
+ closed = closed ,
60
+ pad = pad ,
49
61
na.rm = na.rm ,
50
62
...
51
63
)
@@ -66,92 +78,192 @@ StatBin <- ggproto("StatBin", Stat,
66
78
call. = FALSE )
67
79
}
68
80
81
+ if (! is.null(params $ drop )) {
82
+ warning(" `drop` is deprecated. Please use `pad` instead." , call. = FALSE )
83
+ params $ drop <- NULL
84
+ }
85
+ if (! is.null(params $ origin )) {
86
+ warning(" `origin` is deprecated. Please use `boundary` instead." , call. = FALSE )
87
+ params $ boundary <- params $ origin
88
+ params $ origin <- NULL
89
+ }
90
+ if (! is.null(params $ right )) {
91
+ warning(" `right` is deprecated. Please use `closed` instead." , call. = FALSE )
92
+ params $ closed <- if (params $ right ) " right" else " left"
93
+ params $ right <- NULL
94
+ }
95
+ if (! is.null(params $ breaks )) {
96
+ stop(" `breaks` is deprecated." , call. = FALSE )
97
+ }
98
+ if (! is.null(params $ width )) {
99
+ stop(" `width` is deprecated. Do you want `geom_bar()`?" , call. = FALSE )
100
+ }
101
+ if (! is.null(params $ boundary ) && ! is.null(params $ center )) {
102
+ stop(" Only one of `boundary` and `center` may be specified." , call. = FALSE )
103
+ }
104
+
105
+
69
106
if (is.null(params $ breaks ) && is.null(params $ binwidth ) && is.null(params $ bins )) {
70
107
message_wrap(" `stat_bin()` using `bins = 30`. Pick better value with `binwidth`." )
108
+ params $ bins <- 30
71
109
}
72
110
73
111
params
74
112
},
75
113
76
114
compute_group = function (data , scales , binwidth = NULL , bins = NULL ,
77
- origin = NULL , breaks = NULL , width = 0.9 , drop = FALSE ,
78
- right = FALSE ) {
79
- range <- scales $ x $ dimension()
115
+ center = NULL , boundary = NULL ,
116
+ closed = c(" right" , " left" ), pad = FALSE ,
117
+ # The following arguments are not used, but must
118
+ # be listed so parameters are computed correctly
119
+ breaks = NULL , origin = NULL , right = NULL ,
120
+ drop = NULL , width = NULL ) {
80
121
81
- bin(data $ x , data $ weight , binwidth = binwidth , bins = bins ,
82
- origin = origin , breaks = breaks , range = range , width = width ,
83
- drop = drop , right = right )
122
+ params <- bin_params(
123
+ scales $ x $ dimension(),
124
+ width = binwidth ,
125
+ bins = bins ,
126
+ center = center ,
127
+ boundary = boundary ,
128
+ closed = closed
129
+ )
130
+
131
+ bin_vector(data $ x , weight = data $ weight , width = params $ width ,
132
+ origin = params $ origin , closed = params $ closed , pad = pad )
84
133
},
85
134
86
135
default_aes = aes(y = ..count.. ),
87
136
required_aes = c(" x" )
88
137
)
89
138
90
- bin <- function (x , weight = NULL , binwidth = NULL , bins = NULL , origin = NULL , breaks = NULL , range = NULL , width = 0.9 , drop = FALSE , right = FALSE ) {
91
139
92
- if (length(stats :: na.omit(x )) == 0 ) return (data.frame ())
93
- if (is.null(weight )) weight <- rep(1 , length(x ))
94
- weight [is.na(weight )] <- 0
140
+ # Compute parameters -----------------------------------------------------------
95
141
96
- if (is.null(range )) range <- range(x , na.rm = TRUE , finite = TRUE )
142
+ bin_params <- function (x_range , width = NULL , bins = 30 , center = NULL ,
143
+ boundary = NULL , closed = c(" right" , " left" )) {
144
+ closed <- match.arg(closed )
97
145
98
- if (is.null(bins )) {
99
- bins <- 30
100
- } else {
101
- stopifnot(is.numeric(bins ), length(bins ) == 1 , bins > 1 )
146
+ if (length(x_range ) == 0 ) {
147
+ return (list (width = width , origin = NULL , closed = closed ))
102
148
}
103
149
104
- if (is.null(binwidth )) binwidth <- diff(range ) / (bins - 1 )
105
-
106
- if (is.integer(x )) {
107
- bins <- x
108
- x <- sort(unique(bins ))
109
- width <- width
110
- } else if (diff(range ) == 0 ) {
111
- width <- width
112
- bins <- x
113
- } else {# if (is.numeric(x))
114
- if (is.null(breaks )) {
115
- if (is.null(origin )) {
116
- breaks <- fullseq(range , binwidth , pad = TRUE )
117
- } else {
118
- breaks <- seq(origin , max(range ) + binwidth , binwidth )
119
- }
120
- }
150
+ stopifnot(length(x_range ) == 2 )
151
+ if (! is.null(boundary ) && ! is.null(center )) {
152
+ stop(" Only one of 'boundary' and 'center' may be specified." )
153
+ }
154
+
155
+ if (is.null(width )) {
156
+ width <- (x_range [2 ] - x_range [1 ]) / (bins - 1 )
157
+ }
158
+
159
+ if (is.null(boundary )) {
160
+ if (is.null(center )) {
161
+ # If neither edge nor center given, compute both using tile layer's
162
+ # algorithm. This puts min and max of data in outer half of their bins.
163
+ boundary <- width / 2
121
164
122
- # Adapt break fuzziness from base::hist - this protects from floating
123
- # point rounding errors
124
- diddle <- 1e-07 * stats :: median(diff(breaks ))
125
- if (right ) {
126
- fuzz <- c(- diddle , rep.int(diddle , length(breaks ) - 1 ))
127
165
} else {
128
- fuzz <- c(rep.int(- diddle , length(breaks ) - 1 ), diddle )
166
+ # If center given but not boundary, compute boundary.
167
+ boundary <- center - width / 2
129
168
}
130
- fuzzybreaks <- sort(breaks ) + fuzz
169
+ }
170
+
171
+ # Inputs could be Dates or POSIXct, so make sure these are all numeric
172
+ x_range <- as.numeric(x_range )
173
+ width <- as.numeric(width )
174
+ boundary <- as.numeric(boundary )
175
+
176
+ origin <- find_origin(x_range , width , boundary )
131
177
132
- bins <- cut(x , fuzzybreaks , include.lowest = TRUE , right = right )
133
- left <- breaks [- length(breaks )]
134
- right <- breaks [- 1 ]
135
- x <- (left + right )/ 2
136
- width <- diff(breaks )
178
+ list (width = width , origin = origin , closed = closed )
179
+ }
180
+
181
+ # Find the left side of left-most bin
182
+ find_origin <- function (x_range , width , boundary ) {
183
+ shift <- floor((x_range [1 ] - boundary ) / width )
184
+ boundary + shift * width
185
+ }
186
+
187
+ bin_vector <- function (x , weight = NULL , ... , width = 1 ,
188
+ origin = 0 , closed = c(" right" , " left" ),
189
+ pad = FALSE ) {
190
+ closed <- match.arg(closed )
191
+
192
+ if (all(is.na(x ))) {
193
+ return (bin_out(length(x ), NA , NA , xmin = NA , xmax = NA ))
194
+ }
195
+
196
+ stopifnot(is.numeric(width ) && length(width ) == 1 )
197
+ stopifnot(is.numeric(origin ) && length(origin ) == 1 )
198
+
199
+ if (is.null(weight )) {
200
+ weight <- rep(1 , length(x ))
201
+ } else {
202
+ weight [is.na(weight )] <- 0
137
203
}
138
204
139
- results <- data.frame (
140
- count = as.numeric(tapply(weight , bins , sum , na.rm = TRUE )),
205
+ min_x <- origin
206
+ # Small correction factor so that we don't get an extra bin when, for
207
+ # example, origin=0, max(x)=20, width=10.
208
+ max_x <- max(x , na.rm = TRUE ) + (1 - 1e-08 ) * width
209
+ breaks <- seq(min_x , max_x , width )
210
+ fuzzybreaks <- adjust_breaks2(breaks , closed = closed )
211
+
212
+ bins <- cut(x , fuzzybreaks , include.lowest = TRUE , right = (closed == " right" ))
213
+
214
+ left <- breaks [- length(breaks )]
215
+ right <- breaks [- 1 ]
216
+ x <- (left + right ) / 2
217
+ bin_widths <- diff(breaks )
218
+
219
+ count <- as.numeric(tapply(weight , bins , sum , na.rm = TRUE ))
220
+ count [is.na(count )] <- 0
221
+
222
+ if (pad ) {
223
+ count <- c(0 , count , 0 )
224
+ bin_widths <- c(width , bin_widths , width )
225
+ x <- c(x [1 ] - width , x , x [length(x )] + width )
226
+ }
227
+
228
+ # Add row for missings
229
+ if (any(is.na(bins ))) {
230
+ count <- c(count , sum(is.na(bins )))
231
+ left <- c(left , NA )
232
+ right <- c(right , NA )
233
+ x <- c(x , NA )
234
+ bin_widths <- c(bin_widths , NA )
235
+ }
236
+
237
+ bin_out(count , x , bin_widths )
238
+ }
239
+
240
+ bin_out <- function (count = integer(0 ), x = numeric (0 ), width = numeric (0 ),
241
+ xmin = x - width / 2 , xmax = x + width / 2 ) {
242
+ density <- count / width / sum(abs(count ))
243
+
244
+ data.frame (
245
+ count = count ,
141
246
x = x ,
142
- width = width
247
+ xmin = xmin ,
248
+ xmax = xmax ,
249
+ width = width ,
250
+ density = density ,
251
+ ncount = count / max(abs(count )),
252
+ ndensity = count / max(abs(density )),
253
+ stringsAsFactors = FALSE
143
254
)
255
+ }
144
256
145
- if (sum(results $ count , na.rm = TRUE ) == 0 ) {
146
- return (results )
147
- }
257
+ # Adapt break fuzziness from base::hist - this protects from floating
258
+ # point rounding errors
259
+ adjust_breaks2 <- function (breaks , closed = " left" ) {
260
+ closed <- match.arg(closed , c(" right" , " left" ))
148
261
149
- results $ count [is.na(results $ count )] <- 0
150
- results $ density <- results $ count / results $ width / sum(abs(results $ count ), na.rm = TRUE )
151
- results $ ncount <- results $ count / max(abs(results $ count ), na.rm = TRUE )
152
- results $ ndensity <- results $ density / max(abs(results $ density ), na.rm = TRUE )
153
- if (drop ) {
154
- results <- results [results $ count > 0 , , drop = FALSE ]
262
+ diddle <- 1e-08 * median(diff(breaks ))
263
+ if (closed == " right" ) {
264
+ fuzz <- c(- diddle , rep.int(diddle , length(breaks ) - 1 ))
265
+ } else {
266
+ fuzz <- c(rep.int(- diddle , length(breaks ) - 1 ), diddle )
155
267
}
156
- results
268
+ sort( breaks ) + fuzz
157
269
}
0 commit comments