Skip to content

Commit

Permalink
0.99.50
Browse files Browse the repository at this point in the history
  • Loading branch information
yourpresidentuniversal committed Sep 6, 2023
1 parent 214a661 commit 0904517
Show file tree
Hide file tree
Showing 34 changed files with 360 additions and 89 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: DescTools
Type: Package
Title: Tools for Descriptive Statistics
Version: 0.99.49.1
Date: 2023-04-30
Version: 0.99.50
Date: 2023-09-06
Authors@R: c(
person("Andri", "Signorell", email = "andri@signorell.net", role = c("aut", "cre")),
person("Ken" , "Aho", role = c("ctb")),
Expand Down
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export(
"Clockwise","Closest","Coalesce","CourseData",
"CochranArmitageTest",
"CochranQTest",
"CoefVar","CohenD","CohenKappa","CollapseTable","ColorLegend",
"CoefVar","CoefVarCI","CohenD","CohenKappa","CollapseTable","ColorLegend",
"ColToGray","ColToGrey","ColToHex","ColToHsv","ColToRgb","ColToOpaque","RgbToHex",
"CombN","CombPairs","CombSet","ConDisPairs","Conf","ConnLines","ContCoef","Contrasts","CorCI",
"CorPolychor","CramerV","CramerVonMisesTest","CronbachAlpha","CutQ","Day","Day<-","dBenf",
Expand Down Expand Up @@ -89,7 +89,7 @@ importFrom("stats", ".nknots.smspl", "IQR", "acf", "addmargins", "anova", "aov",
"uniroot", "var", "weighted.mean", "weights", "wilcox.test", "xtabs", "as.dendrogram", "dist", "hclust", "order.dendrogram", "smooth.spline","var.test",
"median", "reorder","AIC", "BIC", "confint", "deviance", "df", "logLik","lm.wfit","profile",
"coef", "coefficients", "cov2cor", "df.residual","nobs", "vcov","update","relevel","rbeta", "rexp", "qexp","reshape",
"confint.default", "ecdf", "glm", "reformulate", "aggregate")
"confint.default", "ecdf", "glm", "reformulate", "aggregate", "dchisq")

importFrom("data.table", "frankv", "frank")

Expand Down
24 changes: 22 additions & 2 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@


DescTools 0.99.49 (2023-04-28)

DescTools 0.99.50 (2023-09-02)
------------------------------

NEW FUNCTIONS ADDED:
* The confidence intervals for the coefficient of variation are calculated
by the new function CoefVar(). Additional methods have been implemented.
So far these are: "vangel","mckay","nct","naive".

UPDATED FUNCTIONS:
* Mode() returns NA for a vector of length = 1.
* CoefVar() confidence intervals have been organised in a new function.
* DFLOAT replaced by DBLE everywhere in Fortran code.





DescTools 0.99.49 (2023-05-14)
------------------------------

UPDATED FUNCTIONS:
* Better solution for the set.seed issue in DunnetTest
(Credits to Michael Chirico in
https://github.com/AndriSignorell/DescTools/pull/102)

* BoxedText() uses xy.coords now.
* BoxedText() supports xy.coords now.
* Two new methods for MultinomCI() have been implemented
(Fitzpatrick-Scott and Quesensberry-Hurst).
* BinomCI() no longer reports p_tilde but the usual x/n as
Expand All @@ -17,6 +35,8 @@ UPDATED FUNCTIONS:
which the method specific non-standard point estimator can be returned.
(Thanks to julienbio99, MrJerryTAO and Vilmantas for the
research and discussion)
* VarTest() gets a more exact calculation for twosided p values
in the case of the one sample test.

BUGFIXES:
* MultinomCI method goodman uses the correct chisquare quantile now.
Expand Down
223 changes: 176 additions & 47 deletions R/StatsAndCIs.r
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,9 @@ Mode <- function(x, na.rm=FALSE) {

if(length(x) == 1L)
# only one value in x, x is the mode
return(structure(x, freq = 1L))
# return(structure(x, freq = 1L))
# changed to: only one value in x, no mode defined
return(structure(NA_real_, freq = NA_integer_))

# we don't have NAs so far, either there were then we've already stopped
# or they've been stripped above
Expand Down Expand Up @@ -4476,6 +4478,24 @@ CohenD <- function(x, y=NULL, pooled = TRUE, correct = FALSE, conf.level = NA, n
}




# find non-centrality parameter for the F-distribution
ncparamF <- function(type1, type2, nu1, nu2) {

# author Ali Baharev <ali.baharev at gmail.com>

# Returns the noncentrality parameter of the noncentral F distribution
# if probability of Type I and Type II error, degrees of freedom of the
# numerator and the denominator in the F test statistics are given.


.C("fpow", PACKAGE = "DescTools", as.double(type1), as.double(type2),
as.double(nu1), as.double(nu2), lambda=double(1))$lambda
}



.nctCI <- function(t, df, conf) {

alpha <- 1 - conf
Expand Down Expand Up @@ -4574,7 +4594,7 @@ CoefVar <- function (x, ...) {



CoefVar.lm <- function (x, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ...) {
CoefVar.lm <- function (x, unbiased = FALSE, na.rm = FALSE, ...) {

# source: http://www.ats.ucla.edu/stat/mult_pkg/faq/general/coefficient_of_variation.htm

Expand All @@ -4592,11 +4612,12 @@ CoefVar.lm <- function (x, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ...
res <- res * ((1 - (1/(4 * (n - 1))) + (1/n) * res^2) +
(1/(2 * (n - 1)^2)))
}
if (!is.na(conf.level)) {
ci <- .nctCI(sqrt(n)/res, df = n - 1, conf = conf.level)
res <- c(est = res, low.ci = unname(sqrt(n)/ci["upr.ci"]),
upr.ci = unname(sqrt(n)/ci["lwr.ci"]))
}

# if (!is.na(conf.level)) {
# ci <- .nctCI(sqrt(n)/res, df = n - 1, conf = conf.level)
# res <- c(est = res, low.ci = unname(sqrt(n)/ci["upr.ci"]),
# upr.ci = unname(sqrt(n)/ci["lwr.ci"]))
# }

return(res)

Expand All @@ -4607,41 +4628,141 @@ CoefVar.lm <- function (x, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ...
# dv <- unname(nlme::getResponse(x))


CoefVar.default <- function (x, weights=NULL, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ...) {
# CoefVar.default <- function (x, weights=NULL, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ...) {
#
# if(is.null(weights)){
# if(na.rm) x <- na.omit(x)
# res <- SD(x) / Mean(x)
# n <- length(x)
#
# }
# else {
# res <- SD(x, weights = weights) / Mean(x, weights = weights)
# n <- sum(weights)
#
# }
#
# if(unbiased) {
# res <- res * ((1 - (1/(4*(n-1))) + (1/n) * res^2)+(1/(2*(n-1)^2)))
# }
#
# if(!is.na(conf.level)){
# ci <- .nctCI(sqrt(n)/res, df = n-1, conf = conf.level)
# res <- c(est=res, low.ci= unname(sqrt(n)/ci["upr.ci"]), upr.ci= unname(sqrt(n)/ci["lwr.ci"]))
# }
#
# return(res)
#
# }

if(is.null(weights)){
if(na.rm) x <- na.omit(x)
res <- SD(x) / Mean(x)
n <- length(x)


CoefVarCI <- function (K, n, conf.level = 0.95,
sides = c("two.sided", "left", "right"),
method = c("nct","vangel","mckay","verrill","naive")) {

# Description of confidence intervals
# https://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/coefvacl.htm


.iCoefVarCI <- Vectorize(function(K, n, conf.level=0.95,
sides = c("two.sided", "left", "right"),
method = c("vangel","mckay","verrill","nct","naive")) {

}
else {
res <- SD(x, weights = weights) / Mean(x, weights = weights)
n <- sum(weights)
method <- match.arg(method)
sides <- match.arg(sides, choices = c("two.sided", "left", "right"),
several.ok = FALSE)

}
# double alpha in case of one-sided intervals in order to be able
# to generally calculate twosided intervals and select afterwards..
if (sides != "two.sided")
conf.level <- 1 - 2 * (1 - conf.level)

alpha <- 1 - conf.level

df <- n - 1
u1 <- qchisq(1-alpha/2, df)
u2 <- qchisq(alpha/2, df)

switch(method, verrill = {
CI.lower <- 0
CI.upper <- 1

}, vangel = {
CI.lower <- K / sqrt(((u1+2)/n - 1) * K^2 + u1/df)
CI.upper <- K / sqrt(((u2+2)/n - 1) * K^2 + u2/df)

}, mckay = {
CI.lower <- K / sqrt((u1/n - 1) * K^2 + u1/df)
CI.upper <- K / sqrt((u2/n - 1) * K^2 + u2/df)

}, nct = {
ci <- .nctCI(sqrt(n)/K, df = df, conf = conf.level)
CI.lower <- unname(sqrt(n)/ci[2])
CI.upper <- unname(sqrt(n)/ci[1])

}, naive = {
CI.lower <- K * sqrt(df / u1)
CI.upper <- K * sqrt(df / u2)
}
)

ci <- c(est = K,
lwr.ci = CI.lower, # max(0, CI.lower),
upr.ci = CI.upper) # min(1, CI.upper))

if (sides == "left")
ci[3] <- Inf

else if (sides == "right")
ci[2] <- -Inf

return(ci)

})


if(unbiased) {
res <- res * ((1 - (1/(4*(n-1))) + (1/n) * res^2)+(1/(2*(n-1)^2)))
}
sides <- match.arg(sides)
method <- match.arg(method)

res <- t(.iCoefVarCI(K=K, n=n, method=method, sides=sides, conf.level = conf.level))

if(!is.na(conf.level)){
ci <- .nctCI(sqrt(n)/res, df = n-1, conf = conf.level)
res <- c(est=res, low.ci= unname(sqrt(n)/ci["upr.ci"]), upr.ci= unname(sqrt(n)/ci["lwr.ci"]))
return(res)

}


CoefVar.default <- function (x, weights = NULL, unbiased = FALSE,
na.rm = FALSE, ...) {


if (is.null(weights)) {
if (na.rm)
x <- na.omit(x)
res <- SD(x)/Mean(x)
n <- length(x)

} else {
res <- SD(x, weights = weights)/Mean(x, weights = weights)
n <- sum(weights)
}

if (unbiased) {
res <- res * ((1 - (1/(4 * (n - 1))) + (1/n) * res^2) + (1/(2 * (n - 1)^2)))
}

return(res)

}


# aus agricolae: Variations Koeffizient aus aov objekt
#
# CoefVar.aov <- function(x){
# return(sqrt(sum(x$residual^2) / x$df.residual) / mean(x$fitted.values))
# }


CoefVar.aov <- function (x, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ...) {
CoefVar.aov <- function (x, unbiased = FALSE, na.rm = FALSE, ...) {

# source: http://www.ats.ucla.edu/stat/mult_pkg/faq/general/coefficient_of_variation.htm

Expand All @@ -4659,11 +4780,12 @@ CoefVar.aov <- function (x, unbiased = FALSE, conf.level = NA, na.rm = FALSE, ..
res <- res * ((1 - (1/(4 * (n - 1))) + (1/n) * res^2) +
(1/(2 * (n - 1)^2)))
}
if (!is.na(conf.level)) {
ci <- .nctCI(sqrt(n)/res, df = n - 1, conf = conf.level)
res <- c(est = res, low.ci = unname(sqrt(n)/ci["upr.ci"]),
upr.ci = unname(sqrt(n)/ci["lwr.ci"]))
}

# if (!is.na(conf.level)) {
# ci <- .nctCI(sqrt(n)/res, df = n - 1, conf = conf.level)
# res <- c(est = res, low.ci = unname(sqrt(n)/ci["upr.ci"]),
# upr.ci = unname(sqrt(n)/ci["lwr.ci"]))
# }

return(res)

Expand Down Expand Up @@ -5236,6 +5358,24 @@ ContCoef <- function(x, y = NULL, correct = FALSE, ...) {
}



.ncchisqCI <- function(chisq, df, conf) {

alpha <- 1 - conf
probs <- c(alpha/2, 1 - alpha/2)

ncp <- suppressWarnings(optim(par = 1.1 * rep(chisq, 2), fn = function(x) {
p <- pchisq(q = chisq, df = df, ncp = x)
abs(max(p) - probs[2]) + abs(min(p) - probs[1])
}, control = list(abstol = 0.000000001)))

chisq_ncp <- unname(sort(ncp$par))

return(chisq_ncp)

}


CramerV <- function(x, y = NULL, conf.level = NA,
method = c("ncchisq", "ncchisqadj", "fisher", "fisheradj"),
correct=FALSE, ...){
Expand Down Expand Up @@ -5376,19 +5516,6 @@ CramerV <- function(x, y = NULL, conf.level = NA,



ncparamF <- function(type1, type2, nu1, nu2) {

# author Ali Baharev <ali.baharev at gmail.com>

# Returns the noncentrality parameter of the noncentral F distribution
# if probability of Type I and Type II error, degrees of freedom of the
# numerator and the denominator in the F test statistics are given.


.C("fpow", PACKAGE = "DescTools", as.double(type1), as.double(type2),
as.double(nu1), as.double(nu2), lambda=double(1))$lambda
}



YuleQ <- function(x, y = NULL, ...){
Expand Down Expand Up @@ -8180,8 +8307,10 @@ print.HoeffD <- function(x, ...)
}


# find non-centrality parameter for the F-distribution
ncparamF <- function(type1, type2, nu1, nu2){
.C("fpow", PACKAGE = "DescTools", as.double(type1), as.double(type2), as.double(nu1), as.double(nu2), lambda=double(1))$lambda
}







Loading

0 comments on commit 0904517

Please sign in to comment.