Skip to content

Commit 7b2bfa1

Browse files
authored
Merge pull request #207 from DistanceDevelopment/Issue_194
Closes Issue #194
2 parents 2e40366 + 51403f7 commit 7b2bfa1

File tree

10 files changed

+108
-20
lines changed

10 files changed

+108
-20
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Description: A simple way of fitting detection functions to distance sampling
1616
Horvitz-Thompson-like estimator) if survey area information is provided. See
1717
Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
1818
methods and <https://distancesampling.org/resources/vignettes.html> for example analyses.
19-
Version: 2.0.0.9011
19+
Version: 2.0.0.9012
2020
URL: https://github.com/DistanceDevelopment/Distance/
2121
BugReports: https://github.com/DistanceDevelopment/Distance/issues
2222
Language: en-GB

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ importFrom(dplyr,inner_join)
4949
importFrom(dplyr,left_join)
5050
importFrom(dplyr,mutate)
5151
importFrom(dplyr,mutate_if)
52+
importFrom(dplyr,pull)
5253
importFrom(dplyr,reframe)
5354
importFrom(dplyr,select)
55+
importFrom(dplyr,summarize)
5456
importFrom(dplyr,summarize_all)
5557
importFrom(dplyr,ungroup)
5658
importFrom(dplyr,vars)
@@ -61,6 +63,7 @@ importFrom(mrds,ddf)
6163
importFrom(mrds,dht)
6264
importFrom(rlang,.data)
6365
importFrom(stats,AIC)
66+
importFrom(stats,aggregate)
6467
importFrom(stats,as.formula)
6568
importFrom(stats,logLik)
6669
importFrom(stats,median)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
* Fixes issue with print dht2 when multipliers are a data.frame (Issue #179)
44
* Fixes bug when including a uniform with no adjustment terms in the summarize_ds_models function (Issue #180)
5+
* dht2 can now deal with strata where there are no observations (Issue #194)
56
* Users can alternatively pass a list of models to summarize_ds_models rather than passing them individually. (Issue #149)
67
* Truncation distances greater than the largest cutpoint value for binned data are no longer permitted as these cause fitting issues. (Issue #175)
78
* print.dht_result now displays estimates for groups as well as individuals by default when group size is present. (Issue #178)

R/ER_var_f.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ ER_var_f <- function(erdat, innes, binomial_var=FALSE){
2525

2626
# sort the data if we use O2/O3 estimators
2727
if(any(erdat$er_est %in% c("O2", "O3"))){
28-
warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful")
28+
warning("Using O2 or O3 encounter rate variance estimator, assuming that sorting on Sample.Label is meaningful", immediate. = TRUE, call. = FALSE)
2929
if(!is.numeric(erdat$Sample.Label)){
30-
warning("Additionally, Sample.Label is not numeric, this may cause additional issues")
30+
warning("Additionally, Sample.Label is not numeric, this may cause additional issues", immediate. = TRUE, call. = FALSE)
3131
}
32+
# Add in a column indexing the original order
33+
erdat$.orig.order <- 1:nrow(erdat)
3234
erdat <- erdat %>%
33-
mutate(.originalorder = 1:nrow(erdat)) %>%
35+
# Sort by sample label
3436
arrange(.data$Sample.Label)
3537
}
3638

R/dht2.R

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,9 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL,
649649
var(.data$size, na.rm=TRUE)/
650650
sum(!is.na(.data$size)),
651651
0),
652-
group_mean = mean(.data$size, na.rm=TRUE)) %>%
652+
group_mean = if_else(.data$n_observations>0,
653+
mean(.data$size, na.rm=TRUE),
654+
1)) %>%
653655
# report n as n_observations
654656
mutate(n = .data$n_observations)
655657
# if we didn't have any areas, then set to 1 and estimate density
@@ -745,11 +747,13 @@ if(mult){
745747
mutate(ER = .data$n/.data$Effort)
746748
}
747749
res <- res %>%
748-
mutate(ER_CV = sqrt(.data$ER_var)/.data$ER,
750+
mutate(ER_CV = if_else(.data$ER == 0, 0,
751+
sqrt(.data$ER_var)/.data$ER),
749752
ER_df = compute_df(.data$k, type=.data$er_est)) %>%
750753
# calculate stratum abundance estimate
751754
mutate(Abundance = (.data$Area/.data$Covered_area) * .data$Nc) %>%
752-
mutate(df_CV = sqrt(.data$df_var)/.data$Abundance) %>%
755+
mutate(df_CV = if_else(.data$Abundance == 0, 0,
756+
sqrt(.data$df_var)/.data$Abundance)) %>%
753757
mutate(group_CV = if_else(.data$group_var==0, 0,
754758
sqrt(.data$group_var)/.data$group_mean))
755759

@@ -1093,7 +1097,7 @@ if(mult){
10931097

10941098
# warn if we only had one transect in one or more strata
10951099
if(any(res$k == 1)){
1096-
warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance")
1100+
warning("One or more strata have only one transect, cannot calculate empirical encounter rate variance", call. = FALSE)
10971101
}
10981102

10991103
# fix area == covered area for compatibility with mrds::dht

R/varNhat.R

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Calculate the variance contribution of the detection function
22
# to abundance estimates
3-
#' @importFrom dplyr reframe
3+
#' @importFrom dplyr reframe group_by across all_of summarize pull
4+
#' @importFrom stats aggregate
45
varNhat <- function(data, model){
56

67
# format the data
@@ -22,14 +23,23 @@ varNhat <- function(data, model){
2223
reframe(Covered_area = sum(.data$Covered_area),
2324
Area = .data$Area) %>%
2425
distinct()
26+
27+
# Add column giving number of obs per stratum
28+
n_obs <- aggregate(!is.na(data$object),
29+
by = data[strat_vars],
30+
FUN = sum)
31+
names(n_obs)[length(n_obs)] <- "n_obs"
32+
33+
# Merge into grp_dat
34+
grp_dat <- left_join(grp_dat, n_obs, by = strat_vars)
2535

2636
# join the per-stratum data onto the frame
2737
data$Covered_area <- NULL
2838
data$Area <- NULL
2939
data <- left_join(data, grp_dat, by=strat_vars)
3040

3141
# function to calculate Nhat
32-
dhtd <- function(par, data, model){
42+
dhtd <- function(par, data, model, grp_dat){
3343
# set par
3444
model$par <- par
3545

@@ -42,6 +52,10 @@ varNhat <- function(data, model){
4252
reframe(N = (.data$Area/.data$Covered_area) *
4353
sum(.data$Nc, na.rm=TRUE)) %>%
4454
distinct()
55+
56+
# Include an NA for strata with no obs and then convert to 0.
57+
res <- left_join(grp_dat, res, by = colnames(grp_dat)[1])
58+
res$N[is.na(res$N)] <- 0
4559

4660
res$N
4761
}
@@ -54,7 +68,7 @@ varNhat <- function(data, model){
5468

5569
# calculate variance
5670
dm <- DeltaMethod(model$par, dhtd, vcov, sqrt(.Machine$double.eps),
57-
model=model, data=data)
71+
model=model, data=data, grp_dat=grp_dat)
5872
attr(dm, "vardat_str") <- vardat_str
5973

6074
# fiddle with variance data.frame

R/variance_contributions.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ variance_contributions <- function(res){
1818
# get the total
1919
CV_cont$Total <- sqrt(rowSums(CV_cont^2))
2020

21-
# make that into percentages
22-
CV_cont <- (CV_cont^2/CV_cont[["Total"]]^2)*100
21+
# make that into percentages (for non zero rows)
22+
index <- which(CV_cont[["Total"]]!=0)
23+
CV_cont[index,] <- (CV_cont[index,]^2/CV_cont[index,"Total"]^2)*100
2324
CV_cont[["Total"]] <- NULL
2425

2526
# zero ER contributions if only one sample

tests/testthat/test_dht2.R

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,63 @@ test_that("can accept data.frame and scalar sampling fractions", {
260260
minke_dht2
261261

262262
})
263+
264+
265+
test_that("Deal with strata with no detections", {
266+
267+
# Add a third Region with two samples but no observations.
268+
# Adding two samples instead of just one to prevent a warning unstable ER var
269+
new.row1 <- minke[1, ] %>%
270+
mutate(
271+
Region.Label = "East",
272+
Area = 10000,
273+
Sample.Label = max(minke$Sample.Label) + 1,
274+
Effort = 10,
275+
distance = NA,
276+
object = NA
277+
)
278+
279+
new.row2 <- minke[1, ] %>%
280+
mutate(
281+
Region.Label = "East",
282+
Area = 10000,
283+
Sample.Label = max(minke$Sample.Label) + 2,
284+
Effort = 10,
285+
distance = NA,
286+
object = NA
287+
)
288+
289+
minke <- rbind(minke, new.row1, new.row2)
290+
# Fit a detection function
291+
detfn <- ds(data=minke, truncation=1.5, key="hr", adjustment=NULL)
292+
293+
result <- dht2(ddf = detfn,
294+
flatfile = minke,
295+
strat_formula = ~ Region.Label,
296+
stratification = "geographical")
297+
298+
expect_true(is(result, "dht_result"))
299+
300+
# Check values in output
301+
new.row3 <- minke[1, ] %>%
302+
mutate(
303+
Region.Label = "East",
304+
Area = 10000,
305+
Sample.Label = max(minke$Sample.Label) + 3,
306+
Effort = 10,
307+
distance = NA,
308+
object = NA
309+
)
310+
311+
minke2 <- rbind(minke, new.row1, new.row2, new.row3)
312+
detfn <- ds(data=minke2, truncation=1.5, key="hr", adjustment=NULL, optimizer = "R")
313+
dht2.results <- dht2(ddf = detfn,
314+
flatfile = minke2,
315+
strat_formula = ~ Region.Label,
316+
stratification = "geographical")
317+
318+
expect_equal(dht2.results$Abundance[1], 0)
319+
expect_equal(dht2.results$Abundance_se[4], 4834.4118, , tolerance = 0.0001)
320+
expect_equal(dht2.results$df[4],15.25496, tolerance = 0.00001)
321+
322+
})

tests/testthat/test_systematic_var2.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ cu <- 1/(cu[3]/(cu[1]*cu[2]))
5656

5757
test_that("no strat",{
5858

59-
df <- ds(Systematic_variance_2, convert_units=cu)
60-
59+
# No adjustments to avoid model selection which generates a monotonicity warning. (The HN model with no adjustments is selected based on minimum AIC any way.)
60+
df <- ds(Systematic_variance_2, nadj = 0,
61+
convert_units=cu, truncation = 10)
6162

6263
# fiddle with region labels
6364
obs.table$Region.Label <- NULL

tests/testthat/test_variance.R

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ test_that("variance 2",{
1010
convert.units <- Systematic_variance_2_units
1111
cu <- convert.units[, 3]
1212
cu <- 1/(cu[3]/(cu[1]*cu[2]))
13-
# first fit a model
14-
sysvar_df <- ds(Systematic_variance_2, adjustment="cos", convert_units=cu)
13+
# first fit a model - to avoid warnings use no adjustments (HN model was selected any way based on minimum AIC)
14+
sysvar_df <- ds(Systematic_variance_2,
15+
nadj = 0,
16+
convert_units=cu)
1517

1618
systematic_var2 <- Systematic_variance_2
1719
unflat <- unflatten(systematic_var2)
@@ -30,9 +32,9 @@ test_that("variance 2",{
3032
unflat$obs.table$Sample.Label <- as.numeric(unflat$obs.table$Sample.Label)
3133
unflat$data$Sample.Label <- as.numeric(unflat$data$Sample.Label)
3234

33-
# fit the detection function
34-
sysvar_df <- ds(unflat$data, adjustment="cos", convert_units=cu)
35-
35+
# fit the detection function (again avoid adjustment terms to supress warning - no adjustments is selected by AIC any way)
36+
sysvar_df <- ds(unflat$data, nadj = 0, convert_units=cu)
37+
3638
## no stratification
3739
#Nhat_nostrat_eff <- dht2(sysvar_df, transects=unflat$sample.table,
3840
# geo_strat=unflat$region.table, observations=unflat$obs.table,

0 commit comments

Comments
 (0)