Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
19f14e0
refactor UI
daattali Sep 19, 2025
114b030
pimp up the UI
daattali Sep 19, 2025
61ad146
fix a bug in code
daattali Sep 19, 2025
3973d18
dont kill all sessions when one user clicks the Exit button, only kil…
daattali Sep 20, 2025
794a407
if a user stops the app, close the window
daattali Sep 20, 2025
27938d7
simplify if code
daattali Sep 20, 2025
eb64c8b
remove duplicated useless dynamic UI code
daattali Sep 20, 2025
5280038
small simplication to UI code
daattali Sep 20, 2025
fa8b7aa
move non shiny code into separate R files for better organization
daattali Sep 20, 2025
0762fa3
fix bug where .xls files would kill the app
daattali Sep 20, 2025
55c9a9c
refactor data intake code to simplify it, remove unneeded reactiveVal…
daattali Sep 20, 2025
dab4107
remove unneeded code relating to the comments log
daattali Sep 20, 2025
a277b18
dont show comments log when there is no text
daattali Sep 20, 2025
f247282
simplification: use a reactive variable instead of reactiveVal, decou…
daattali Sep 20, 2025
44cef79
simplify server code - move logic for reading file out of shiny, this…
daattali Sep 20, 2025
fa4e3a7
simplify the main data validation code, don't use any super/global as…
daattali Sep 21, 2025
92ff7aa
remove the last global assignment, that was causing the "download res…
daattali Sep 21, 2025
4c0bdab
simplify code for download button
daattali Sep 21, 2025
35d3ba2
move data validation function to its own file, out of server logic
daattali Sep 21, 2025
d4161e6
complete refactor of data validation code to break it into multiple c…
daattali Sep 22, 2025
c070ffb
when validation error occurs, show message instead of killing app
daattali Sep 22, 2025
6d0ff91
add tests
daattali Sep 22, 2025
28e7988
rename m to MONTE_CARLO_SIM_rEPS
daattali Sep 22, 2025
6cf8096
significantly speed up data validation (One Sheet Carlisle xlsx takes…
daattali Sep 23, 2025
9da8236
simplify code, dont' use any reactiveVals
daattali Sep 23, 2025
ea834e5
remove all the unneeded parallelization code
daattali Sep 24, 2025
5324348
set the stage to be able to have the main loop run either in parallel…
daattali Sep 24, 2025
a3760be
introduce a boolean flag in global.R RUN_PARALLEL to decide whether t…
daattali Sep 24, 2025
cb84176
Delete tests directory
daattali Oct 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
*.Rproj
.Renviron
104 changes: 21 additions & 83 deletions Global.R
Original file line number Diff line number Diff line change
@@ -1,108 +1,46 @@
# Global.R
library(shiny)
library(openxlsx)
library(readxl)
library(metap)
library(Rfast)
library(shinyjs)
library(rsconnect)
library(shinyWidgets)
#library(mirai)
#library(promises)
library(future)
library(foreach)
library(doParallel)
library(MBESS)
library(dqrng)
library(OpenMx)
library(digitTests)
library(rsconnect)
library(bslib)
library(shinydashboard)
library(future)
library(foreach)
library(doFuture)

registerDoFuture(flavor = "%dofuture%")
plan(multisession)

#library(BiocManager)
#BiocManager::install("multtest")

remove(list=ls())

DATA <- NULL
Results <- NULL
LineNumber <- 1
TRIALS <- NULL
LengthTrials <- NULL
priorMessage <- NULL
ColumnNames <- NULL
CategoryNames <- NULL

# m is the replication number for the Monte Carlo simulation
m <- 100000
m <- 15000
RUN_PARALLEL <- FALSE

if (RUN_PARALLEL) {
calculate_workers <- function() {
total_cores <- parallelly::availableCores()

############################################################################
# References #
# Carlisle JB. The analysis of 168 randomised controlled trials to test #
# data integrity. Anaesthesia. 2012;67:521-537. #
# #
# Carlisle JB, Dexter F, Pandit JJ, Shafer SL, Yentis SM. Calculating the #
# probability of random sampling for continuous variables in submitted or #
# published randomised controlled trials. Anaesthesia. 2015;70:848-58. #
# #
# Carlisle JB. Data fabrication and other reasons for non-random sampling #
# in 5087 randomised, controlled trials in anaesthetic and general medical #
# journals. Anaesthesia. 2017;72:944-952 #
############################################################################

is_local <- Sys.getenv('SHINY_PORT') == ""
if (is_local)
setwd("g:/projects/Fraud/2025")

outputComments <- function(
...,
echo = getOption("ECHO_OUTPUT_COMMENTS", TRUE),
sep = " ")
{
isolate({
argslist <- list(...)
if (length(argslist) == 1) {
text <- argslist[[1]]
if (total_cores <= 2) {
return(1)
} else if (total_cores <= 4) {
return(total_cores - 1)
} else if (total_cores <= 8) {
return(total_cores - 2)
} else {
text <- paste(argslist, collapse = sep)
return(ceiling(total_cores * 0.6))
}

# If this is called within a shiny app, try to get the active session
# and write to the session's logger
commentsLog <- function(x) invisible(NULL)
session <- getDefaultReactiveDomain()
if (!is.null(session) &&
is.environment(session$userData) &&
is.reactive(session$userData$commentsLog))
{
commentsLog <- session$userData$commentsLog
}

if (is.na(echo)) return()
if (is.data.frame((text)))
{
con <- textConnection("outputString","w",local=TRUE)
capture.output(print(text, digits = 3), file = con, type="output", split = FALSE)
close(con)
if (echo)
{
for (line in outputString) cat(line, "\n")
}
for (line in outputString) commentsLog(paste0(commentsLog(), "<br>", line))
} else {
if (echo)
{
cat(text, "\n")
}
commentsLog(paste0(commentsLog(), "<br>", text))
}
})
}

plan(multisession, workers = calculate_workers())
}

# replication number for the Monte Carlo simulation
MONTE_CARLO_SIM_REPS <- 15000

COMMON_COL_NAMES <- c("TRIAL", "ROW", "N", "MEAN", "SD", "ROUND_MEAN", "ROUND_OBSERVATION")
COLS_CONTINUOUS <- c("N", "MEAN", "SD")
Binary file removed Integrity Analysis.docx
Binary file not shown.
151 changes: 151 additions & 0 deletions R/p_calc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# Primary Statistical Function for Monte Carlo Simulation
P_Calc <- function(trial_data, CategoryNames)
{
data <- trial_data
TRIAL <- trial_data$TRIAL[1]
RowIDs <- unique(data$ROW)

x <- foreach(
j = 1:length(RowIDs),
.combine = rbind
) %do%
{
Row <- RowIDs[j]
ROWS <- data[data$ROW == Row,]

# Greater than 1 line?
if (nrow(ROWS) > 1)
{
# Is this categorical?
if (all(!is.na(ROWS$N)))
{
COLS <- nrow(ROWS)
N <- sum(ROWS$N)
Meanmean <- sum(ROWS$N*ROWS$MEAN) / N
# The calculation of Meanvar is OK. SD^2 is an unbiased estimate
# of variance
Meanvar <- sum(ROWS$N*ROWS$SD^2) / N

# However, this next calculatiion is biased. s.u. will correct it
# If N > 30, then the correction is < 1 %. It blows up if N > 343!
if (N < 30)
{
Meansd <- s.u(sqrt(Meanvar), N)
} else {
Meansd <- sqrt(Meanvar)
}
# Protect size of simulation
if ((MONTE_CARLO_SIM_REPS*N) < 1000000000) # One billion
{
m1 <- MONTE_CARLO_SIM_REPS
} else {
m1 <- 1000000000 / N
}
SEMsample <- Meansd/sqrt(mean(ROWS$N))
DiffSample <- sum((ROWS$MEAN - Meanmean)^2) # Squared difference of column means
# Monte Carlo Simulation
meansim <- dqrnorm(MONTE_CARLO_SIM_REPS,mean=Meanmean,sd=SEMsample) # Generate a new mean for each simulation
MonteCarloMean <- matrix(NA, nrow = m1, ncol = COLS) # I want one row for each simulation
# Need to do each column separately. Couldn't think of an efficient way to do this without
# a loop.
for (i in 1:COLS)
MonteCarloMean[,i] <-
round(
rowmeans(
round(
# The matrix below will have one row for each replication (m rows),
# and one column for each person (N[i] columns)
# Cannot use dqrnorm because it won't support the array
# of meansim needed for each replication
matrix(
rnorm(ROWS$N[i] * m1, rep(meansim, ROWS$N[i]), Meansd),
nrow = m1, byrow = FALSE
),
ROWS$ROUND_OBSERVATION[i]
)
),
ROWS$ROUND_OBSERVATION[i]
)
N <- matrix(ROWS$N, nrow = m1, ncol = COLS, byrow = TRUE)
# Calculate the weighted mean, and then round
MeanSamples <- rowsums (MonteCarloMean * N) / sum(ROWS$N)
DiffSamples <- rowsums((MonteCarloMean - MeanSamples)^2)

PEQ <- sum(DiffSamples == DiffSample) / m1
PLE <- sum(DiffSamples < DiffSample)/m1 + PEQ
PGE <- sum(DiffSamples > DiffSample)/m1 + PEQ
} else {
ROWS <- ROWS[,CategoryNames]
for (NAME in CategoryNames)
{
if (all(is.na(ROWS[,NAME])))
ROWS[,NAME] <- NULL
}
PLE <- chisq.test(ROWS, simulate.p.value=MONTE_CARLO_SIM_REPS)$p.value
PGE <- 1-PLE
}
# Need to be sure P != 0 or 1
if(PLE == 1) PLE <- 0.999
if(PLE == 0) PLE <- 0.001
if(PGE == 1) PGE <- 0.999
if(PGE == 0) PGE <- 0.001
PLE = as.character(signif(PLE,4))
PGE = as.character(signif(PGE, 4))
} else {
PLE = "Only 1 Row"
PGE = NA
}
c(as.character(Row), PLE, PGE)
} %seed% TRUE

# This bizarre code is because if there is only 1 row, R creates a data.frame
# with 3 columns and 1 row.
if (length(x) == 3)
{
x <- as.data.frame(t(x))
} else {
x <- as.data.frame(x)
}

x <- cbind(NA, x)
x[1,1] <- TRIAL
x <- as.data.frame(x)
names(x) <- c("TRIAL", "ROW", "PLE", "PGE")
cat("Row IDs", RowIDs, "\n")
#print(x)
cat("match results", match(x$ROW, RowIDs), "\n")
x <- x[match(x$ROW, RowIDs),]
#print(x)

PLEvalues <- as.numeric(x$PLE)
PGEvalues <- as.numeric(x$PGE)

PLEvalues <- PLEvalues[!is.na(PLEvalues)]
PGEvalues <- PGEvalues[!is.na(PGEvalues)]

if (length(PLEvalues) > 1) {
PLE <- signif(sumz(PLEvalues)$p, 4)
} else if (length(PLEvalues) == 1) {
PLE <- PLEvalues
} else if (length(PLEvalues) == 0) {
PLE <- "No values"
}

if (length(PGEvalues) > 1) {
PGE <- signif(sumz(PGEvalues)$p,4)
} else if (length(PGEvalues) == 1) {
PGE <- PGEvalues
} else if (length(PGEvalues) == 0) {
PGE <- "No values"
}

lastline <- data.frame(
TRIAL = c(NA, NA),
ROW = c("Summary", NA),
PLE = c(as.character(PLE), NA),
PGE = c(as.character(PGE), NA)
)

x <- rbind(x, lastline)
return(x)
}
88 changes: 88 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
read_input_file <- function(filepath) {
ext <- tools::file_ext(filepath)

result <- tryCatch({
if (ext == "csv") {
read.csv(filepath)
} else if (ext == "xlsx") {
readxl::read_xlsx(filepath)
} else if (ext == "xls") {
readxl::read_xls(filepath)
} else {
stop(".", ext, " is not a supported file type")
}
}, error = function(err) {
stop("Error reading file: ", err$message)
})

if (is.data.frame(result) && nrow(result) == 0) {
stop("File does not contain any data")
}

result
}

is_category <- function(x) {
# Remove NAs first for efficiency, then check if all values are integers

# If there are no na values, then it can't be a category
if (!any(is.na(x)))
return(FALSE)

# If the vector is empty after removing NAs then it is not a category
x_clean <- x[!is.na(x)]
if (length(x_clean) == 0)
return(FALSE)

# if there are non-numeric values, it is not a category
if (!is.numeric(x_clean))
return(FALSE)

# Check if all values are equal to their integer representation
all(x_clean == as.integer(x_clean))
}

outputComments <- function(
...,
echo = getOption("ECHO_OUTPUT_COMMENTS", TRUE),
sep = " ")
{
isolate({
argslist <- list(...)
if (length(argslist) == 1) {
text <- argslist[[1]]
} else {
text <- paste(argslist, collapse = sep)
}

# If this is called within a shiny app, try to get the active session
# and write to the session's logger
commentsLog <- function(x) invisible(NULL)
session <- getDefaultReactiveDomain()
if (!is.null(session) &&
is.environment(session$userData) &&
is.reactive(session$userData$commentsLog))
{
commentsLog <- session$userData$commentsLog
}

if (is.na(echo)) return()
if (is.data.frame((text)))
{
con <- textConnection("outputString","w",local=TRUE)
capture.output(print(text, digits = 3), file = con, type="output", split = FALSE)
close(con)
if (echo)
{
for (line in outputString) cat(line, "\n")
}
for (line in outputString) commentsLog(paste0(commentsLog(), "\n", line))
} else {
if (echo)
{
cat(text, "\n")
}
commentsLog(paste0(commentsLog(), "<br>", text))
}
})
}
Loading