Skip to content

Commit

Permalink
0.99.50.2
Browse files Browse the repository at this point in the history
  • Loading branch information
yourpresidentuniversal committed Oct 23, 2023
1 parent 81b7448 commit 01f548d
Show file tree
Hide file tree
Showing 14 changed files with 217 additions and 29 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: DescTools
Type: Package
Title: Tools for Descriptive Statistics
Version: 0.99.50.1
Version: 0.99.50.2
Date: 2023-11-06
Authors@R: c(
person("Andri", "Signorell", email = "andri@signorell.net", role = c("aut", "cre")),
Expand Down
8 changes: 4 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ export(
"DunnettTest","DunnTest",
"DurbinWatsonTest","Entropy","Eps","ErrBars","EtaSq","ExpFreq","Factorize","FctArgs","Fibonacci","FindColor","FindCorr","FisherZ","FisherZInv","FixToTable","Flags",
"Format","Frac","Freq", "Freq2D", "GCD","GetCurrPP","GetCurrWrd","GetCurrXL","GetNewPP","GetNewWrd","GetNewXL",
"Gini","GiniSimpson","HunterGaston","Gmean","GoodmanKruskalGamma","GoodmanKruskalTau","Gsd","GTest","Herfindahl","HexToCol","HexToDec","HexToRgb","HighLow","Hmean",
"Gini","GiniSimpson","GiniDeltas","HunterGaston","Gmean","GoodmanKruskalGamma","GoodmanKruskalTau","Gsd","GTest","Herfindahl","HexToCol","HexToDec","HexToRgb","HighLow","Hmean",
"HmsToSec","HmsToMinute","HodgesLehmann",
"HotellingsT2Test","Hour","HuberM","ICC","identify.formula","IdentifyA","Impute","InDots","Interval","IRR","IsDate","IsDichotomous","IsEuclid","IsLeapYear","IsNumeric","IsOdd","IsPrime","IsValidHwnd","IsWeekend","IsWhole","IsZero","JarqueBeraTest",
"KappaM","KendallTauB","KendallW","Keywords","KrippAlpha","Kurt",
"Label","Label<-","Unit","Unit<-","Lambda","Large","LastDayOfMonth","Lc","LCM","LehmacherTest",
"Label","Label<-","Labels","Labels<-","Unit","Unit<-","Lambda","Large","LastDayOfMonth","Lc","LCM","LehmacherTest",
"LeveneTest",
"LillieTest","lines.Lc","lines.lm","lines.loess","lines.smooth.spline","lines.SmoothSpline","LinScale",
"LOCF","LOF","Logit","LogitInv","LogSt","LogStInv","LsFct","LsObj",
Expand All @@ -31,7 +31,7 @@ export(
"NemenyiTest","Now","OctToDec","OddsRatio","OPR","OrderMixed","Outlier","Overlap",
"PageTest", "PlotProbDist",
"PairApply","ParseFormula","ParseSASDatalines","CorPart","PasswordDlg","pBenf","PearsonTest",
"PercTable",
"PercTable", "Mgsub",
"Permn","Phi","plot.bagplot","plot.Conf","plot.Lc","plot.Lclist","plot.PostHocTest","PlotACF","PlotArea","PlotBag","PlotBagPairs",
"PlotBubble","PlotCandlestick","PlotCirc","PlotCorr","ABCCoords","Bg",

Expand All @@ -57,7 +57,7 @@ export(
"axTicks.POSIXct", "print.PercTable", "PlotMosaic", "PDFManual", "LongToRgb", "PlotMiss","BreuschGodfreyTest",
"WrdParagraphFormat", "WrdParagraphFormat<-", "WrdFont", "WrdFont<-","WrdStyle", "WrdStyle<-", "WrdTableBorders",
"DigitSum",
"Range","Mean","SD","Var","Cov","Cor","Median","MAD","Quantile",
"Range","Mean","SD","Var","SDN","VarN", "EX", "VarX","Cov","Cor","Median","MAD","Quantile",
"ORToRelRisk", "Abstract", "ColumnWrap",
"WrdCellRange", "WrdMergeCells", "WrdFormatCells", "Some","PlotFdist", "plot.palette",
"Pal", "Shade", "StrExtract", "Arrow", "Asp", "PlotLog", "axTicks.Date", "LineToUser",
Expand Down
17 changes: 15 additions & 2 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@


DescTools 0.99.50 (2023-09-02)
DescTools 0.99.51 (2023-11-02)
------------------------------

NEW FUNCTIONS ADDED:
*
* Labels() is a vectorized function for Label().
* SDN() and VarN() return the uncorrected (biased) estimators for
standard deviation and variance (sometimes needed for
didactical reasons).
* EX() and VarX() return the expected value and the variance for
the distribution of a discrete random variable.
* Mgsub() is a vectorized version for gsub, allowing to replace
multiple patterns at the same time.
* New function GiniDeltas() returns the Gini variant from Deltas.
(Credits to Wim Bernasco, https://github.com/AndriSignorell/DescTools/issues/120)


UPDATED FUNCTIONS:
* TwoGroups() gains a formula interface. Plot, print and ToWrd
routines have been better organized.

BUGFIXES:
* HunterGaston() now calculates what it always pretended to do.
(Credits to Wim Bernasco)


DescTools 0.99.50 (2023-09-02)
Expand Down
3 changes: 2 additions & 1 deletion R/Desc.R
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,8 @@ calcDesc.numeric <- function(x, n, maxrows = NULL, conf.level = 0.95,
# check for remarkably frequent values in a numeric variable
# say the most frequent value has significantly more than 5% from the total sample
modefreq_crit <-
binom.test(attr(modex, "freq"), n = n, p = 0.05, alternative = "greater")
binom.test(ZeroIfNA(attr(modex, "freq")), n = n, p = 0.05, alternative = "greater")

if (modefreq_crit$p.value < 0.05 & psum$unique > 12) {
modefreq_crit <- gettextf(
"heap(?): remarkable frequency (%s) for the mode(s) (= %s)",
Expand Down
31 changes: 31 additions & 0 deletions R/DescTools.r
Original file line number Diff line number Diff line change
Expand Up @@ -7195,6 +7195,37 @@ Label <- function(x) {
return(x)
}



`Labels<-` <- function(x, value) {
if(is.list(value)) stop("cannot assign a list to be an object label")
# if((length(value) != 1L) & !is.null(value)) stop("value must be character vector of length 1")


if(is.atomic(x)) {
DescTools::Label(x) <- value

} else {

value <- rep(value, times=length(x))

for(i in seq(x))
DescTools::Label(x[, i]) <- value[i]
}

return(x)

}

Labels <- function(x) {
if(is.atomic(x))
Label(x)
else
sapply(x, DescTools::Label)
}



# "Label<-.data.frame" <- function(x, self=(length(value)==1), ..., value) {
#
# if(!is.data.frame(x)) stop("x must be a data.frame")
Expand Down
87 changes: 72 additions & 15 deletions R/StatsAndCIs.r
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,41 @@ Cov <- cov
Cor <- cor



SDN <- function(x, na.rm = FALSE){
sd(x, na.rm=na.rm) * sqrt(((n <- sum(!is.na(x)))-1) /n)
}


VarN <- function(x, na.rm = FALSE){
var(x, na.rm=na.rm) * ((n <- sum(!is.na(x)))-1) /n
}


EX <- function(x, p) sum(x * p)

VarX <- function(x, p) sum((x - EX(x, p))^2 * p)



# multiple gsub
Mgsub <- function(pattern, replacement, x, ...) {

if (length(pattern)!=length(replacement)) {
stop("pattern and replacement do not have the same length.")
}
result <- x
for (i in 1:length(pattern)) {
result <- gsub(pattern[i], replacement[i], result, ...)
}

result

}




# Length(x)
# Table(x)
# Log(x)
Expand Down Expand Up @@ -1194,8 +1229,9 @@ Mode <- function(x, na.rm=FALSE) {
# or they've been stripped above
res <- fastModeX(x, narm=FALSE)

# no mode existing, if max freq is only 1 observation
if(length(res)== 0L & attr(res, "freq")==1L)
return(structure(NA_real_, freq = 1L))
return(structure(NA_real_, freq = NA_integer_))

else
# order results kills the attribute
Expand Down Expand Up @@ -5089,8 +5125,8 @@ predict.Lc <- function(object, newdata, conf.level=NA, general=FALSE, n=1000, ..

# recoded for better support weights 2022-09-14

Gini <- function(x, weights=NULL, unbiased=TRUE, conf.level = NA,
R = 10000, type = "bca", na.rm=FALSE) {
Gini <- function(x, weights=NULL, unbiased=TRUE,
conf.level = NA, R = 10000, type = "bca", na.rm=FALSE) {

# https://core.ac.uk/download/pdf/41339501.pdf

Expand Down Expand Up @@ -5131,15 +5167,15 @@ Gini <- function(x, weights=NULL, unbiased=TRUE, conf.level = NA,
if (is.na(conf.level)) {
res <- i.gini(x, weights, unbiased = unbiased)

}
else {
} else {

boot.gini <- boot(data = x,
statistic = function(z, i, u, unbiased)
i.gini(x = z[i], w = u[i], unbiased = unbiased),
R=R, u=weights, unbiased=unbiased)
ci <- boot.ci(boot.gini, conf = conf.level, type = type)
res <- c(gini = boot.gini$t0, lwr.ci = ci[[4]][4], upr.ci = ci[[4]][5])

}

return(res)
Expand Down Expand Up @@ -5174,22 +5210,43 @@ GiniSimpson <- function(x, na.rm = FALSE) {
}




HunterGaston <- function(x, na.rm = FALSE){

# we must restrict to x as factors here to ensure we have all the levels
# these are used in length(p)

if(!is.factor(x)){
GiniDeltas <- function (x, na.rm = FALSE) {

# Deltas (2003, DOI:10.1162/rest.2003.85.1.226).

if (!is.factor(x)) {
warning("x is not a factor!")
return(NA)
}
if(na.rm) x <- na.omit(x)
if (na.rm)
x <- na.omit(x)

p <- prop.table(table(x))
sum(p*(1-p)) * length(p)/(length(p)-1)
sum(p * (1 - p)) * length(p)/(length(p) - 1)

}



HunterGaston <- function(x, na.rm = FALSE){

# Hunter-Gaston index (Hunter & Gaston, 1988, DOI:10.1128/jcm.26.11.2465-2466.1988)
# Credits to Wim Bernasco
# https://github.com/AndriSignorell/DescTools/issues/120

# see: vegan::simpson.unb(BCI)


if (is.factor(x) | is.character(x)) {
if (na.rm)
x <- na.omit(x)
tt <- table(x)
} else {
tt <- x
}

sum(tt * (tt - 1)) / (sum(tt) * (sum(tt) - 1))

}


Expand Down
33 changes: 33 additions & 0 deletions man/EX.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
\name{EX}
\alias{EX}
\alias{VarX}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Expected Value and Variance
%% ~~function to do ... ~~
}
\description{Expected Value and Variance for the distribution of a discrete random variable.
(For didactical purposes..)
%% ~~ A concise (1-5 lines) description of what the function does. ~~
}
\usage{
EX(x, p)
VarX(x, p)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{x}{the values of the random variable
%% ~~Describe \code{x} here~~
}
\item{p}{the probabilities of the values
%% ~~Describe \code{p} here~~
}
}
\value{numeric value
}
\author{Andri Signorell <andri@signorell.net>
%% ~~who you are~~
}
\examples{
EX(x=c(1:3), p=c(0.2, 0.5, 0.3))
VarX(x=c(1:3), p=c(0.2, 0.5, 0.3))
}
10 changes: 7 additions & 3 deletions man/GiniSimpson.Rd
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
\name{GiniSimpson}
\alias{GiniSimpson}
\alias{GiniDeltas}

\alias{HunterGaston}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Gini-Simpson Coefficient and Hunter-Gaston Index
\title{Gini-Simpson Coefficient, Gini-Deltas coefficient and Hunter-Gaston Index
%% ~~function to do ... ~~
}
\description{Calculate the Gini-Simpson coefficient and the Hunter-Gaston Index.
\description{Calculate the Gini-Simpson coefficient, the Gini variant proposed by Deltas and the Hunter-Gaston Index.
%% ~~ A concise (1-5 lines) description of what the function does. ~~
}
\usage{
GiniSimpson(x, na.rm = FALSE)
GiniDeltas(x, na.rm = FALSE)

HunterGaston(x, na.rm = FALSE)
}
Expand All @@ -30,10 +33,11 @@ This measure is also known in ecology as the probability of interspecific encoun
}
\references{
Cover Thomas M. and Thomas Joy A. (1991) \emph{Elements of Information Theory}. Wiley.

Hunter, P., Gaston, A. G. (1988) Numerical Index of the Discriminatory Ability of Typing Systems: an Application of Simpson's Index of Diversity, \emph{JOURNAL OF CLINICAL MICROBIOLOGY}, Nov. 1988, p. 2465-2466, 0095-1137/88/112465-02$02.00/0
Deltas (2003) DOI:10.1162/rest.2003.85.1.226.
%% ~put references to the literature/web site here ~
}
\author{Andri Signorell <andri@signorell.net>
%% ~~who you are~~
Expand Down
7 changes: 7 additions & 0 deletions man/Label.Rd
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
\name{Label, Unit}
\alias{Label}
\alias{Label<-}
\alias{Labels}
\alias{Labels<-}
\alias{Unit}
\alias{Unit<-}

Expand All @@ -14,6 +16,9 @@
Label(x)
Label(x) <- value

Labels(x)
Labels(x) <- value

Unit(x)
Unit(x) <- value

Expand All @@ -30,6 +35,8 @@ Unit(x) <- value

\details{The label should consist of a single text (length of 1). The text may contain line feeds.
It can be deleted by setting the label to \code{NULL}.

\code{Labels()} can be used to retrieve and assign vectorized labels to data.frames or lists.
}
\value{
\code{Label} and \code{Unit} return the label attribute of x, if any; otherwise, NULL.
Expand Down
35 changes: 35 additions & 0 deletions man/Mgsub.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
\name{Mgsub}
\alias{Mgsub}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{Multiple Gsub
%% ~~function to do ... ~~
}
\description{Performs multiple substitions in (a) string(s).
%% ~~ A concise (1-5 lines) description of what the function does. ~~
}
\usage{
Mgsub(pattern, replacement, x, ...)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{pattern}{character string containing a regular expression (or character string for fixed = TRUE) to be matched in the given character vector. Coerced by as.character to a character string if possible.
}
\item{replacement}{a replacement for matched pattern as in \code{\link{sub}} and \code{\link{gsub}}.
See there for more information.
}
\item{x}{a character vector where matches are sought, or an object which can be coerced by as.character to a character vector. Long vectors are supported.
}
\item{\dots}{all dots are passed on to gsub.
}
}
\value{
a character vector of the same length and with the same attributes as x (after possible coercion to character).
}
\author{Andri Signorell <andri@signorell.net>
}
\seealso{\code{\link{gsub}}
}
\examples{
x <- c("ABC", "BCD", "CDE")
Mgsub(pattern=c("B", "C"), replacement=c("X","Y"), x)
}
Loading

0 comments on commit 01f548d

Please sign in to comment.