# Date: 26-02-2021
#**************************#
# all custom functions #
#**************************#
#' These are all my custom R functions!
#*********************************************************************************
# EMPTY CONSOLE ####
#*********************************************************************************
lllllllll <- function(){cat(rep("\n", 50))}
#*********************************************************************************
# SET UP GRAPHIC DEVICE ####
#*********************************************************************************
parSave <- function(){
parT <- par(no.readonly=TRUE)
parT$pin <- NULL # Do not want to save this, can only lead to problems when trying to reset to that (perhaps some other vars also)
return(parT)
}
parReset <- function(parOld, split=FALSE){
par(parOld)
if(split){par(mfrow=c(2,2))}
}
parMarModify <- function(f=0.3, reduce=FALSE){
stopifnot(0<f)
if(reduce){
stopifnot(f<=1)
parM <- par()$mar
M.d <- parM*f
par(mar=parM-M.d)
}else{
parM <- par()$mar
M.d <- parM*f
par(mar=parM+M.d)
}
}
setUpGraph <- function(row=2, col=2){
oldPar <- parSave() # In case I want to automatically save my par setup
dev.new()
par(mfrow=c(row,col))
invisible(oldPar)
}
#*********************************************************************************
# ASSIGN ARGUMENTS OF FUNCTION TO WORK INSIDE FUNCTION ####
#*********************************************************************************
asArguments <- function(...){
argmns <- list(...)
list2env(argmns, envir = .GlobalEnv) # Add arguments to global environment
invisible() # So that no output is printed
}
#*********************************************************************************
# NICE UNIVARIATE PLOT ####
#*********************************************************************************
niceUnivPlot <- function(numVar, catVar=NULL, pairedVar=NULL, violin=TRUE, fxdCol=NULL,
showMean=TRUE, plot.points=TRUE, bw='nrd0', jitFactor=0.2,
add.ylim=0, ylim.cust=NULL, xlab=NULL, ylab=NULL, densScl=0.5,
main=NULL, sigGroup=FALSE, sigMu=NULL, multCmp=FALSE,
pairCol=NULL, add.lgnd=FALSE, add=FALSE, lnk.means=NULL,
lnk.means.lwd=2, pair.lwd=2, point.ColPalette=NULL, bgalph=100, ...){
#*********************************************************************************
# CHECK REQUIREMENTS AND PREPARE SOME ARGUMENTS ####
#*********************************************************************************
### Set important names:
catVar.nm <- ''
numVar.nm <- deparse(substitute(numVar))
### Check if input is matrix-like object:
if(inherits(numVar, what = c('matrix', 'data.frame'))){
### Check some requirements:
if(!is.null(catVar)){stop("catVar should not be specified when numVar is supplied as a table")}
### Remove non-numeric columns:
fac_id <- sapply(numVar, inherits, what=c('character', 'factor'))
if(any(fac_id)){
numVar <- numVar[, !fac_id, drop=FALSE]
warning(paste0(sum(fac_id), ' categorical variables were removed for plotting.'))
}
### Store colnames:
colnms <- colnames(numVar)
### Check if pairedVar is supplied:
if(!is.null(pairedVar)){
stopifnot(length(pairedVar)==nrow(numVar)) # Must be same length
### Get into wide format:
laps <- lapply(colnms, function(x){data.frame(prVar=pairedVar, value=numVar[,x])})
### Extract pairedVar:
pairedVar <- do.call(rbind, laps)$prVar
### Check if pairedCol is supplied as vector:
if(length(pairCol) > 1){
### Check if correct length:
stopifnot(length(pairCol)==nrow(numVar))
### Get into wide format:
laps <- lapply(colnms, function(x){data.frame(prCol=pairCol, value=numVar[,x])})
### Extract pairCol:
pairCol <- do.call(rbind, laps)$prCol
}
}
### Get to "wide" format:
laps <- lapply(colnms, function(x){data.frame(value=numVar[,x], varNm=x)})
### Extract num and catVar:
numVar <- do.call(rbind, laps)$value
catVar <- factor(do.call(rbind, laps)$varNm, levels=colnms)
} else{ # In case not a table is supplied as numVar
### Check some requirements:
stopifnot(is.numeric(numVar))
stopifnot(is.null(ylim.cust) | length(ylim.cust)==2) # Check valid entries for ylim.cust
### Generate catvar:
catVar.nm <- deparse(substitute(catVar)) # Get name of catVar
if(is.null(catVar)){
catVar <- factor(rep(1,length(numVar))) # 1-level factor
}else{
stopifnot(length(catVar)==length(numVar)) # Make sure lengths are same
catVar <- factor(catVar) # Dont use as.factor, otherwise non-present levels are not removed.
}
}
### Set ylim:
ylms <- c(min(numVar, na.rm = TRUE), max(numVar, na.rm = TRUE))
ylms <- ylms + c(-add.ylim, add.ylim) # Add the add.ylim
#*********************************************************************************
# RUN GROUP COMPARISONS ####
#*********************************************************************************
### Only run if catVar has entries of multiple levels:
drawGrCmp <- FALSE # Indicator if group comparisons can be plotted
if(nlevels(catVar) > 1 & sigGroup){
### List group comparisons in table:
grInd <- data.frame(t(utils::combn(1:nlevels(catVar),2)))
### Apply wilcoxon tests to compare corresponding groups (collect pvalues):
if(is.null(pairedVar)){ # Unpaired test
wilxP <- apply(grInd, 1, function(x){ wilcox.test(x = numVar[as.numeric(catVar)==x[1]],
y = numVar[as.numeric(catVar)==x[2]])$p.value })
}else{ # Paired test
### Check whether length of pairedVar is correct:
stopifnot(length(pairedVar)==length(numVar), is.factor(pairedVar))
### Function to perform correct pairwise test (have to be careful that same cases are paired)
pairSel <- function(x){
dd <- data.frame(numVar, catVar, pairedVar)
xx <- dd[as.numeric(dd$catVar)==x[1],]
yy <- dd[as.numeric(dd$catVar)==x[2],]
### Check which cases are present in both groups:
lvs <- 1:nlevels(pairedVar)
lvsOk <- which(lvs %in% as.numeric(xx$pairedVar) & lvs %in% as.numeric(yy$pairedVar))
xx.1 <- xx[as.numeric(xx$pairedVar) %in% lvsOk,]
yy.1 <- yy[as.numeric(yy$pairedVar) %in% lvsOk,]
### Apply wilcoxon test:
wilcox.test(x = xx.1[order(xx.1$pairedVar),'numVar'],
y = yy.1[order(yy.1$pairedVar),'numVar'], paired = TRUE)$p.value
}
wilxP <- apply(grInd, 1, pairSel)
}
### Combine to table (and apply Bonferonni correction):
grCmp.0 <- cbind(grInd, wilxP)
if(multCmp){grCmp.0$wilxP <- wilxP*nrow(grInd)} # Apply Bonferroni correction
### Add stars to table:
grCmp.0$strs <- symnum(grCmp.0$wilxP, corr = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, ifelse(max(grCmp.0$wilxP) > 1, max(grCmp.0$wilxP)+1, 1)),
symbols = c("***", "**", "*", ""), legend = FALSE)
### Filter out only significant tests:
grCmp <- grCmp.0[grCmp.0$wilxP < 0.05,]
### Only proceed if there are significant tests:
if(nrow(grCmp) > 0){
### Add y-values for plot (should not be too far apart):
grCmp$yVal <- max(ylms) + (abs(min(ylms)-max(ylms))*0.05 * 1:nrow(grCmp))
### Adapt ylms:
ylms <- c(ylms[1], max(grCmp$yVal))
### Set indicator to plot to TRUE:
drawGrCmp <- TRUE
}else{warning('There were no significant group differences.')}
}
#*********************************************************************************
# RUN MU COMPARISONS ####
#*********************************************************************************
### Test each group against a mean value Mu using a wilcoxon test:
drawMu <- FALSE # Indicator to draw results in plot
if(!is.null(sigMu)){
### Iterate through groups:
p.mu <- NA
for(i in 1:nlevels(catVar)){
dmu.i <- numVar[as.numeric(catVar)==i]
p.mu[i] <- wilcox.test(dmu.i, mu = sigMu)$p.value
}
# Apply Bonferroni correction:
if(multCmp){p.mu <- p.mu*nlevels(catVar)}
### Create table:
d.mu <- data.frame("categ"=1:nlevels(catVar), "pmu"=p.mu)
### Add stars:
d.mu$strs <- symnum(d.mu$pmu, corr = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, ifelse(max(d.mu$pmu) > 1, max(d.mu$pmu)+1, 1)),
symbols = c("***", "**", "*", ""), legend = FALSE)
### Filter out only significant tests:
d.mu <- d.mu[d.mu$pmu < 0.05,]
### Only proceed if there are significant tests:
if(nrow(d.mu) > 0){
### Adapt ylms:
if(sigMu < ylms[1]){
ylms[1] <- sigMu
}else if(sigMu > ylms[2]){
ylms[2] <- sigMu
}
### Set indicator to plot to TRUE:
drawMu <- TRUE
}else{warning('There were no significant differences from tested location Mu.')}
}
#*********************************************************************************
# PLOT POINTS ####
#*********************************************************************************
### Get title name:
main.nm <- deparse(substitute(main))
### Plugin custom y-values:
if(!is.null(ylim.cust)){
ylms <- ylim.cust
}
### Plugin custom axes labels:
if(is.null(xlab)){
xlab <- ifelse(catVar.nm=='NULL', '', catVar.nm)
}
if(is.null(ylab)){
ylab <- numVar.nm
}
### Set colour:
if(is.null(point.ColPalette)){
pointPal <- hcl.colors(n = nlevels(catVar), palette = 'dynamic')
}else{
pointPal <- rep(point.ColPalette, nlevels(catVar))
}
### Start plot (empty):
if(!add){
plot(x=1,y=1,
type='n',
xlim = c(0,nlevels(catVar)+1),
ylim = ylms,
xaxt = 'n',
ylab = ylab,
xlab = xlab,
main = ifelse(main.nm=='NULL', paste0(numVar.nm, ' Plot'), main))
}
### Add points:
### Set background of points:
bgcol0 <- if(is.null(fxdCol)){pointPal[as.numeric(catVar)]}else{fxdCol}
bgcol <- mktransp(color=bgcol0, alpha=bgalph)
if(plot.points){
points(x = jitter(as.numeric(catVar), factor = jitFactor),
y = numVar,
col = if(is.null(fxdCol)){pointPal[as.numeric(catVar)]}else{fxdCol},
bg = bgcol,
...)
}
### Add x-axis labels:
if(!catVar.nm=='NULL' & !add){
axis(1, at = 1:nlevels(catVar), labels = levels(catVar))
}
### Add legend:
if(nlevels(catVar) > 1 & add.lgnd){ # Only needed with multiple levels
legend('bottomright', legend = levels(catVar),
pch=1,
col=if(is.null(fxdCol)){1:nlevels(catVar)}else{fxdCol})
}
#*********************************************************************************
# PLOT LINES IN PAIRED CASE ####
#*********************************************************************************
### Only run if a "pairedVar" factor was provided:
if(!is.null(pairedVar) & nlevels(catVar) > 1){
### Make sure there are no levels with no entries:
pairedVar <- factor(pairedVar)
### Check if every case has maximally one entry per catVar level:
if(max(table(catVar, pairedVar)) > 1){ warning('There are cases with multiple entries for a catVar level.') }
### Define colour of lines:
if(is.null(pairCol)){
pCol <- as.numeric(pairedVar) # Each case gets its own colour
} else if(length(pairCol)==length(pairedVar)){
if(is.factor(pairCol)){
pCol <- as.numeric(pairCol) # Colour of lines according to pairCol factor
}
if(is.numeric(pairCol)){
br_ramp <- colorRampPalette(c('red','blue'))
pCol <- br_ramp(10)[as.numeric(cut(pairCol, breaks=10))] # Colour ranging from blue to red according to value of pairCol numeric
}
if(is.character(pairCol)){
### In case of character use directly:
pCol <- pairCol
}
}else{pCol <- pairCol} # When pairCol is one fixed colour
### Draw lines per case in for-loop:
dd.0 <- data.frame(numVar, catVar, pairedVar, pCol)
for(i in 1:nlevels(pairedVar)){
dd <- dd.0[as.numeric(dd.0$pairedVar)==i,] # Select entries of case i
dd <- dd[order(dd$catVar), ] # Make sure order of catVar is correct
### Check whether pCol values are all equal for case:
if(length(unique(dd$pCol)) != 1){warning('There are cases with different values of the pairCol factor.')}
### Plot line:
lines(x = dd$catVar, y = dd$numVar, col=dd$pCol[1], lwd=pair.lwd)
}
}
#*********************************************************************************
# ADD VIOLIN LINES ####
#*********************************************************************************
### Add the violin lines:
if(violin){
L <- list()
for(i in 1:nlevels(catVar)){
d <- density(numVar[as.numeric(catVar)==i], na.rm = TRUE, bw = bw)
### Get min and max value of numVar:
minNum.i <- min(numVar[as.numeric(catVar)==i], na.rm = TRUE)
maxNum.i <- max(numVar[as.numeric(catVar)==i], na.rm = TRUE)
### Remove density values falling outside the min-max range:
denx <- d$x[d$x > minNum.i & d$x < maxNum.i]
deny <- d$y[d$x > minNum.i & d$x < maxNum.i]
### Turn density values at each end to zero to cut off the density curve:
deny[1] <- 0
deny[length(deny)] <- 0
L[[i]] <- data.frame(xd=denx, yd=deny)
}
names(L) <- levels(catVar)
### We have to scale the densities, need the maximum value for that:
maxD <- max(do.call(c, lapply(L, function(x){x$yd})))
cexD <- densScl/maxD
### Now plot the densities:
for(i in 1:nlevels(catVar)){
lines(L[[i]]$yd*cexD + i, L[[i]]$xd, col= if(is.null(fxdCol)){pointPal[i]}else{fxdCol}, lwd=3)
lines((-L[[i]]$yd)*cexD + i, L[[i]]$xd, col=if(is.null(fxdCol)){pointPal[i]}else{fxdCol}, lwd=3)
}
}
#*********************************************************************************
# ADD MEAN LINES AND CONNECTIONS ####
#*********************************************************************************
### Add the mean-value lines:
if(showMean){
for(i in 1:nlevels(catVar)){
mVal <- mean(numVar[as.numeric(catVar)==i], na.rm = TRUE)
segments(x0 = i-0.3, y0 = mVal, x1 = i+0.3, y1 = mVal, col = if(is.null(fxdCol)){pointPal[i]}else{fxdCol}, lwd = 3)
}
}
### Add mean connections:
if(!is.null(lnk.means)){
mVal <- tapply(numVar, INDEX = catVar, FUN = mean, na.rm=TRUE)
lines(x = 1:nlevels(catVar), y = mVal, col=lnk.means, lwd=lnk.means.lwd)
}
#*********************************************************************************
# ADD LINES OF GROUP COMPARISONS ####
#*********************************************************************************
### Only run if there are significant results:
if(drawGrCmp){
### Draw lines:
for(i in 1:nrow(grCmp)){
xx <- as.numeric(grCmp[i,1:2])
yy <- rep(grCmp[i,'yVal'], 2)
lines(xx, yy)
### Find a good ticklength:
tickL <- (abs(min(ylms)-max(ylms))*0.01)
lines(c(xx[1], xx[1]), c(yy[1], yy[1] - tickL))
lines(c(xx[2], xx[2]), c(yy[2], yy[2] - tickL))
### Add stars:
text(x = mean(xx), y = yy[1]+tickL, labels = grCmp[i,'strs'])
}
}
#*********************************************************************************
# ADD LINES OF MU COMPARISONS ####
#*********************************************************************************
### Only run if there are significant results:
if(drawMu){
### Draw lines:
for(i in 1:nrow(d.mu)){
xx <- rep(d.mu$categ[i]-0.4, 2)
yy <- c(mean(numVar[as.numeric(catVar)==d.mu$categ[i]], na.rm = TRUE), sigMu)
lines(xx,yy)
### Find good ticklength:
tickL <- nlevels(catVar)/100
lines(c(xx[1], xx[1] + tickL), c(yy[1], yy[1]))
lines(c(xx[2], xx[2] + tickL), c(yy[2], yy[2]))
### Add stars:
text(x = xx[1]-2*tickL, y = mean(yy), labels = d.mu[i,'strs'])
### Draw Mu value as line:
abline(h = sigMu, lty=2)
}
}
}
#*********************************************************************************
# NICE PAIRS PLOT ####
#*********************************************************************************
nicePairsPlot <- function(x, catVar = NULL, breaks = "Sturges", density = FALSE, jitter = FALSE, jitFactor = 1, loess = FALSE, swtchPan = FALSE, exclude = c("none", "numeric", "factor"), keepOrder = FALSE, facsAtBegin = FALSE, cex.diag = 2, cex.offdiag = 1, cex.mean = 2, cex.lvls = 1){
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# PREPARE DATA ####
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Make sure x is a data frame:
x <- data.frame(x)
### Check some things about exclude option:
if(any(exclude %in% colnames(x))){ # In case of manually selecting vars
if(!all(exclude %in% colnames(x))){warning('Not all variables to exclude are present in the data')}
if(any(exclude %in% as.character(formals(nicePairsPlot)$exclude)[-1])) warning(paste0('The exclude argument is appearing in the colnames but also includes one of the general exclusion options (', paste(as.character(formals(nicePairsPlot)$exclude)[-1], collapse = ', '), ')'))
}else{
exclude <- match.arg(exclude)
cls_toex <- switch (exclude,
none = '',
numeric = c('numeric', 'integer'),
factor = c('character', 'factor')
)
### Get var names to exclude:
ex_log <- sapply(x, inherits, what=cls_toex)
exclude <- colnames(x)[ex_log]
}
### Check some things regarding catVar:
if(!is.null(catVar)){
stopifnot(nrow(x)==length(catVar))
stopifnot(inherits(catVar, 'factor'))
}
### Remove variables to exclude:
x <- x[, !(colnames(x)%in%exclude), drop=FALSE]
### Turn every character to factor:
lfac <- lapply(x, function(a){
if(inherits(a, 'character')) {rval <- factor(a)}else{rval <- a}
})
x <- do.call(data.frame, lfac)
### Reorder columns to have factors together:
if(!keepOrder){
fac_id <- sapply(x, inherits, what='factor') # Indices
if(!facsAtBegin){
x <- cbind(x[, !fac_id, drop=FALSE], x[, fac_id, drop=FALSE]) # At end
}else{
x <- cbind(x[, fac_id, drop=FALSE], x[, !fac_id, drop=FALSE]) # At beginning
}
}
### Store colnames:
nms <- colnames(x)
### Store whether factor:
fac_id <- sapply(x, inherits, what='factor')
### Make sure data has multiple variables:
if(ncol(x) < 2){stop('There must be at least two variables for plotting.')}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# DIFFERENT PLOT FUNCTIONS ####
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Barplot factors:
barpl_fac <- function(vx, vy){
### Only one variable needed:
v <- vx
### xylims:
xlims <- c(0, nlevels(v)) + 0.5
ylims <- c(0, 1.4*max(table(v)))
### Prepare Plot:
plot(x = NA, xlab='', ylab='', yaxt='n', xaxt='n', xlim=xlims, ylim=ylims, yaxs='i', xaxs='i')
### Background:
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#b6f0fc")
### Add rectangles:
bw <- 0.43 # bar width
recpos <- 1:nlevels(v)
rect(recpos - bw, 0, recpos + bw, table(v), col = 'cyan')
### Axes:
if(edge_rw!=0) axis(edge_cl)
### Level names:
if(nlevels(v) < 10){
text(x = recpos, y = ylims[1]+0.05*ylims[2], labels = levels(v), srt=90, pos=4, offset=0, cex=cex.lvls)
}
### Title:
text(x = mean(par()$usr[1:2]), y = ylims[2]*0.85, labels = nms[rw], cex=cex.diag)
}
#************************************
### Histogramm:
histgr <- function(vx, vy){
### Only one variable needed:
v <- vx
### Histogramm data:
h <- hist(v, breaks = breaks, plot = FALSE)
hbrks <- h$breaks
nB <- length(hbrks)
y <- h$counts
### xylims:
xlims <- range(hbrks)
ylims <- c(0, 1.4*max(y))
### Prepare plot:
plot(x = NA, xlab='', ylab='', yaxt='n', xaxt='n', xlim=xlims, ylim=ylims, yaxs='i', xaxs='i')
### Add axes:
if(edge_cl!=0) axis(edge_cl)
if(edge_rw==3 & !swtchPan) axis(edge_rw) # Draw axis above histogramm in top left corner
if(edge_rw==1 & swtchPan) axis(edge_rw) # Draw axis below histogramm in bottom right corner
### Add rectangles:
rect(hbrks[-nB], 0, hbrks[-1], y, col = 'cyan')
### Title:
text(x = mean(par()$usr[1:2]), y = ylims[2]*0.85, labels = nms[rw], cex=cex.diag)
### Add density:
if(density){
tryd <- try(d <- density(vx, na.rm = TRUE), silent = TRUE)
if (class(tryd) != 'try-error') {
d$y <- d$y/max(d$y)*max(y)
lines(d, col='darkblue')
}
}
}
#************************************
### Scatterplot:
scattpl <- function(vx, vy){
### Indicator if mean vals should be plotted:
pltm <- FALSE
### xylims and jitter:
jitx <- jity <- 0 # Default
jitf_fct <- 0.5 # Have jitter a bit smaller for factors
### Var x:
if(fac_id[cl]){
xlims <- c(0, nlevels(vx)) + 0.5
jitx <- jitter(as.numeric(vx), factor=jitFactor*jitf_fct) - as.numeric(vx) # Create jitter
}else{
xlims <- range(vx, na.rm = TRUE) + (diff(range(vx, na.rm = TRUE))*0.05)*c(-1,1)
jitx <- if (jitter){ jitter(as.numeric(vx), factor=jitFactor) - as.numeric(vx)}else{0}
}
### Var y:
if(fac_id[rw]){
ylims <- c(0, nlevels(vy)) + 0.5
jity <- jitter(as.numeric(vy), factor=jitFactor*jitf_fct) - as.numeric(vy) # Create jitter
}else{
ylims <- range(vy, na.rm = TRUE) + (diff(range(vy, na.rm = TRUE))*0.05)*c(-1,1)
jity <- if (jitter){ jitter(as.numeric(vy), factor=jitFactor) - as.numeric(vy)}else{0}
}
### Prepare plot:
plot(x = NA, xlab='', ylab='', yaxt='n', xaxt='n', xlim=xlims, ylim=ylims, yaxs='i', xaxs='i')
### Background:
if(all(fac_id[c(rw, cl)])){
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#b6f0fc")
}else if(any(fac_id[c(rw, cl)])){
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#e8fbff")
### Indicator if mean vals should be plotted:
pltm <- TRUE
}
### Add points:
cls <- if(is.null(catVar)){1}else{catVar} # Set colour
points(x = as.numeric(vx) + jitx, y = as.numeric(vy) + jity, col=cls)
### Add mean values (only for plots with numeric and factor):
if(pltm){
### Check which one is factor:
if(fac_id[rw]){
points(x = tapply(vx, vy, mean, na.rm=TRUE), y = 1:nlevels(vy), col='cyan', cex=cex.mean, pch=19)
}else{
points(x = 1:nlevels(vx), y = tapply(vy, vx, mean, na.rm=TRUE), col='cyan', cex=cex.mean, pch=19)
}
}
### Add axes:
if(edge_rw!=0){
if(fac_id[cl]){
axis(edge_rw, at = 1:nlevels(vx), labels = levels(vx))
}else{
axis(edge_rw)
}
}
if(edge_cl!=0){
if(fac_id[rw]){
axis(edge_cl, at = 1:nlevels(vy), labels = levels(vy))
}else{
axis(edge_cl)
}
}
### Loess line (only for numerics):
if(loess & !fac_id[rw] & !fac_id[cl]){
tryd <- try(lml <- suppressWarnings(loess(vy ~ vx, degree = 1, family = "symmetric")), silent=TRUE)
if(class(tryd)!='try-error'){ # In case of no error
tempx <- data.frame(vx = seq(min(vx, na.rm = TRUE),
max(vx, na.rm = TRUE), length.out = 50))
pred <- predict(lml, newdata = tempx)
lines(x=tempx$vx, y=pred, col='cyan', lty=2, lwd=2)
}
}
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# DIFFERENT STATISTICAL TEST FUNCTIONS ####
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Simple correlation:
cortst <- function(vx, vy){
### Add try statement because of potential errors:
tryd <- try({
cort <- cor.test(vx, vy)
p <- cort$p.value
psymbs <- c("***", "**", "*", "")
star <- symnum(p, corr = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 1),
symbols = psymbs, legend = FALSE)
txt <- paste0('r= ', round(cort$estimate, digits=2), star)
### Size of numbers:
poscex <- seq(from=1, by=0.8, length.out=4) # Possible sizes
poscex <- poscex[length(poscex):1] # Reverse
cex <- poscex[psymbs %in% star] * cex.offdiag
}, silent = TRUE)
### In case of error:
if(class(tryd)=='try-error'){
txt <- 'Err'
cex <- 1
}
### Write statistic and add stars:
plot(x = NA, xlab='', ylab='', yaxt='n', xaxt='n', xlim=0:1, ylim=0:1, yaxs='i', xaxs='i')
text(0.5, 0.5, txt, cex = cex)
}
#************************************
### ANOVA:
aovtst <- function(vx, vy){
### Which var is factor:
if(fac_id[rw]){ yaov <- vx; xaov <- vy }else{ yaov <- vy; xaov <- vx }
### Add try statement because of potential errors:
tryd <- try({
res <- anova(lm(yaov ~ xaov))
fval <- round(res$`F value`[1], 1)
p <- res$`Pr(>F)`[1]
psymbs <- c("***", "**", "*", "")
star <- symnum(p, corr = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 1),
symbols = psymbs, legend = FALSE)
txt <- paste0('F= ', fval, star)
### Size of numbers:
poscex <- seq(from=1, by=0.8, length.out=4) # Possible sizes
poscex <- poscex[length(poscex):1] # Reverse
cex <- poscex[psymbs %in% star] * cex.offdiag
}, silent = TRUE)
### In case of error:
if(class(tryd)=='try-error'){
txt <- 'Err'
cex <- 1
}
### Setup plot:
plot(x = NA, xlab='', ylab='', yaxt='n', xaxt='n', xlim=0:1, ylim=0:1, yaxs='i', xaxs='i')
### Background:
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#e8fbff")
### Write statistic and add stars:
text(0.5, 0.5, txt, cex = cex)
}
#************************************
### Chisq test:
chitst <- function(vx, vy){
### Add try statement because of potential errors:
tryd <- try({
res <- chisq.test(table(vx, vy))
xval <- round(res$statistic, 1)
p <- res$p.value
psymbs <- c("***", "**", "*", "")
star <- symnum(p, corr = FALSE, cutpoints = c(0, 0.001, 0.01, 0.05, 1),
symbols = psymbs, legend = FALSE)
txt <- paste0('Chi= ', xval, star)
### Size of numbers:
poscex <- seq(from=1, by=0.8, length.out=4) # Possible sizes
poscex <- poscex[length(poscex):1] # Reverse
cex <- poscex[psymbs %in% star] * cex.offdiag
}, silent = TRUE)
### In case of error:
if(class(tryd)=='try-error'){
txt <- 'Err'
cex <- 1
}
### Prepare plot:
plot(x = NA, xlab='', ylab='', yaxt='n', xaxt='n', xlim=0:1, ylim=0:1, yaxs='i', xaxs='i')
### Background:
rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#b6f0fc")
### Write statistic and add stars:
text(0.5, 0.5, txt, cex = cex)
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# CREATE PLOT INDICATION MATRIX ####
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Visual plots:
### Keys:
pltnms <- c('histgr', 'barpl_fac', 'scattpl')
### Matrix:
plt_m <- matrix(3, nrow=ncol(x), ncol = ncol(x))
### Diagonal:
diag(plt_m) <- fac_id+1
### Turn to text matrix:
plt_m_chr <- matrix(pltnms[as.vector(plt_m)], nrow=ncol(x))
### Test matrix:
### Keys:
tstnms <- c('cortst', 'aovtst', 'chitst')
### Matrix:
tst_m <- outer(fac_id, fac_id, "+")+1
### Turn to text matrix:
tst_m_chr <- matrix(tstnms[as.vector(tst_m)], nrow=ncol(x))
### Combine the two matrices:
if(!swtchPan){
plt_m_chr[lower.tri(plt_m_chr)] <- tst_m_chr[lower.tri(tst_m_chr)]
}else{
plt_m_chr[upper.tri(plt_m_chr)] <- tst_m_chr[upper.tri(tst_m_chr)]
}
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# CREATE PLOT ####
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
### Prepare plot:
olpar <- parSave()
on.exit(parReset(olpar)) # Make sure par is reset
par(mfrow=c(ncol(x), ncol(x)))
### Margins:
mars <- rep(0.5, 4)
par(mar=mars)
par(oma = rep(3, 4))
### Loop through plot grid:
for(rw in 1:ncol(x)){
### Check if on edge:
edge_rw <- if(rw==1){3} else if(rw==ncol(x)){1} else{0}
for(cl in 1:ncol(x)){
### Check if on edge:
edge_cl <- if(cl==1){2} else if(cl==ncol(x)){4} else{0}
### Apply correct plot:
do.call(plt_m_chr[rw, cl], list(vx = x[,cl], vy = x[,rw]))
}
}
}
#*********************************************************************************
# NICE 3D PLOT ####
#*********************************************************************************
nice3DPlot <- function(X = NULL, whatToPlot = c('P','D','PD'), plotFit = c('no','lin','int','int2','int3'), catVar = factor(1), covMat = NULL, means = NULL, nSim = 500, pointCol = 1, colRamp = NULL, pointSize = NULL, spheres = FALSE, pointTrans = 0.8, Dtransp_fac = 0.06, colres = 50, gridRes = 30, h = 0.3, DpointSize = 30, axesNames = NULL, axesTicks = FALSE, gridCol = 'grey', axesLeng = NULL, zoom = 1, add = FALSE, htmlFilename=NULL, ...){
#*********************************************************************************
# TEST CONDITIONS ####
#*********************************************************************************
stopifnot(whatToPlot[1] %in% c('P','D', 'PD'))
stopifnot(plotFit[1] %in% c('no','lin','int','int2','int3'))
stopifnot(is.factor(catVar))
if(!all(unlist(lapply(as.data.frame(X[,1:3]), is.numeric)))){
stop("The first three variables of X (used for plotting) have to be numeric")
}
#*********************************************************************************
# EMPTY PLOT IF NOTHING PROVIDED ####
#*********************************************************************************
if(is.null(X) & is.null(means) & is.null(covMat)){
if(is.null(axesNames)) {axesNames <- c('x','y','z')}
if(is.null(axesLeng)) {axesLeng <- rep(1,3)}
.coorSystem3D(axesNames, gridCol, axesLeng, axesTicks, zoom)
warning("No data provided, so only empty coordinate system created.")
return()
}
#*********************************************************************************
# PREPARE DATA ####
#*********************************************************************************
### If X provided:
if (!is.null(X)){
### Select first three columns:
if(ncol(X)>3){warning('More than three variables in the data. Will select the first three columns for plotting.')}
dat <- as.data.frame(X[,1:3])
### In case colour ramp up is asked for:
if(!is.null(colRamp)){
### Some checks:
if(any(is.na(colRamp))){warning('There are missings in the colRamp variable')}
if(length(colRamp) != nrow(X)){warning('colRamp does not have the same length as the data to be plotted.')}
### Prepare palette:
ncl_pl <- 100
col_pl <- colorRampPalette(c('navy', 'hotpink'))(ncl_pl)
### Cut likel. values into ranges:
col_indx <- as.numeric(cut(colRamp, breaks=ncl_pl))
### Point colour vector:
pointCol <- col_pl[col_indx]
}
### Combine with other variables
dat <- cbind(dat, "catV"=catVar, "pointC"=pointCol)
### Adapt pointCol if catVar supplied:
if(nlevels(dat$catV) > 1){dat$pointC <- as.numeric(dat$catV)}
### Remove rows with missing values:
noNaSel <- apply(dat, 1, function(x){all(!is.na(x))})
dat <- dat[noNaSel,]
if(sum(!noNaSel) > 0){warning("There are missing values in the data. Will remove the rows containing missing values.")}
### Generate x, y, z:
colnames(dat)[1:3] <- c('x','y','z')
### Generate axesnames if not provided:
if(is.null(axesNames)) {axesNames <- colnames(X)[1:3]}
#*********************
### Simulate data:
}else if(!is.null(covMat) & !is.null(means)){
### If no X provided:
dat <- as.data.frame(mvtnorm::rmvnorm(n = nSim, mean = means, sigma = covMat)) # Alternative: method = 'svd'
### Combine with other variables
dat <- cbind(dat, "catV"=catVar, "pointC"=pointCol)
### Generate x, y, z:
colnames(dat)[1:3] <- c('x','y','z')
### Generate axesnames if not provided:
if(is.null(axesNames)) {axesNames <- c('x','y','z')}
} else {
stop('No data (X) is provided. For a simulation either the vector of means or the covariance matrix is missing. To run a Simulation both are needed.')
}
#*********************************************************************************
# DENSITY CALCULATION ####
#*********************************************************************************
### Apply the kernel density estimator:
dens <- misc3d::kde3d(dat$x,dat$y,dat$z, h = h, n = gridRes)
### Create the matrix needed for the rgl.points (this is simply a rearrangement to get everything in one table)
indices <- expand.grid(1:gridRes, 1:gridRes, 1:gridRes)
den_coor <- expand.grid(dens$x, dens$y, dens$z)
convertFunc <- function(x, dd){dd[x[1], x[2], x[3]]}
den_coor$dens <- apply(indices, 1, convertFunc, dd=dens$d)
### Colour palette:
rbPal <- grDevices::colorRampPalette(c('blue', 'red'))
### Define progression of the colour pallette:
den_coor$col <- rbPal(colres)[as.numeric(cut(den_coor$dens,breaks = colres))]
#*********************************************************************************
# PLOT POINTS/DENSITY ####
#*********************************************************************************
### Set axis-length if not provided:
if(is.null(axesLeng)){axesLeng <- c(max(dat$x), max(dat$y), max(dat$z))*1.5}else{axesLeng <- rep(axesLeng, length.out=3)}
### Plot empty coordinate system (only if add is FALSE):
if(!add){
.coorSystem3D(axesNames, gridCol, axesLeng, axesTicks, zoom)
}
### Start plotting:
if(grepl('P', whatToPlot[1])){
if(spheres){ # Should spheres be plotted or points
if(is.null(pointSize)){pointSize <- 0.05}
rgl::rgl.spheres(dat[,1:3], color=dat$pointC, radius=pointSize, alpha=pointTrans)
}else{
if(is.null(pointSize)){pointSize <- 5}
rgl::points3d(dat[,1:3], color=dat$pointC, size=pointSize, alpha=pointTrans, point_antialias = TRUE) # point_antialias makes points round in WebGL (or use rgl.spheres alternatively)
}
}
if (grepl('D', whatToPlot[1])){ # Plot density
rgl::points3d(den_coor[,1:3], color=den_coor$col, size=DpointSize, point_antialias = TRUE,
alpha=den_coor$dens/max(den_coor$dens)*Dtransp_fac) # Here also the progression of the transparency is defined
}
#*********************************************************************************
# PLOT LINEAR FIT ####
#*********************************************************************************
### Check what to plot:
if(plotFit[1]!='no'){
### Prepare grid for plotting:
x.pred <- seq(min(dat$x), max(dat$x), length.out = 100)
z.pred <- seq(min(dat$z), max(dat$z), length.out = 100)
xz.grid <- expand.grid("x"=x.pred, "z"=z.pred)
### Iterate through the catVar levels:
for(i in 1:nlevels(dat$catV)){
#****************************************
### In case there is only 1 catVar level:
if(nlevels(dat$catV)==1){
if(plotFit[1]=='lin'){
### Fit model for coefficients:
lm.y <- lm(y ~ x + z, dat)
### Predict y values:
y.pred <- matrix(predict(lm.y, newdata = data.frame("x"=xz.grid$x, "z"=xz.grid$z)), ncol = length(x.pred))
}else if(plotFit[1]=='int'){
### Fit model for coefficients:
lm.y <- lm(y ~ x * z, dat)
### Predict y values:
y.pred <- matrix(predict(lm.y, newdata = data.frame("x"=xz.grid$x, "z"=xz.grid$z)), ncol = length(x.pred))
}
#****************************************
}else if(plotFit[1]=='lin'){
### Fit model for coefficients:
lm.y <- lm(y ~ x + z + catV, dat)
### Predict y values:
y.pred <- matrix(predict(lm.y, newdata = data.frame("x"=xz.grid$x, "z"=xz.grid$z, "catV"=levels(dat$catV)[i])), ncol = length(x.pred))
}else if(plotFit[1]=='int'){
### Fit model for coefficients:
lm.y <- lm(y ~ x * z + catV, dat)
### Predict y values:
y.pred <- matrix(predict(lm.y, newdata = data.frame("x"=xz.grid$x, "z"=xz.grid$z, "catV"=levels(dat$catV)[i])), ncol = length(x.pred))
}else if(plotFit[1]=='int2'){
### Fit model for coefficients:
lm.y <- lm(y ~ x + z + catV + catV:x + catV:z, dat)
### Predict y values:
y.pred <- matrix(predict(lm.y, newdata = data.frame("x"=xz.grid$x, "z"=xz.grid$z, "catV"=levels(dat$catV)[i])), ncol = length(x.pred))
}else if(plotFit[1]=='int3'){
### Fit model for coefficients:
lm.y <- lm(y ~ x * z * catV, dat)
### Predict y values:
y.pred <- matrix(predict(lm.y, newdata = data.frame("x"=xz.grid$x, "z"=xz.grid$z, "catV"=levels(dat$catV)[i])), ncol = length(x.pred))
}
### Set surface color:
srfCl <- ifelse(test = nlevels(dat$catV) > 1, yes = i, no = "steelblue")
### Plot surface of fit:
rgl::surface3d(x = x.pred, z = z.pred, y = y.pred, color = srfCl,
alpha = 0.5, lit = FALSE)
}
}
#*********************************************************************************
# SAVE HTML FILE ####
#*********************************************************************************
### Check if filename exists:
if(!is.null(htmlFilename)){
### Create widget:
widget <- rgl::rglwidget(...)
### Save as html:
htmlwidgets::saveWidget(widget, htmlFilename)
}
}
### Function to plot empty 3d coordinate system (only intended to be used inside nice3DPlot):
.coorSystem3D <- function(axesNames, gridCol, axesLeng, axesTicks, zoom) {
rgl::clear3d()
rgl::bg3d(color = 'white')
rgl::segments3d(c(0, axesLeng[1]), c(0, 0), c(0, 0), color = "black")
rgl::segments3d(c(0, 0), c(0,axesLeng[2]), c(0, 0), color = "black")
rgl::segments3d(c(0, 0), c(0, 0), c(0,axesLeng[3]), color = "black")
axes <- rbind(c(axesLeng[1], 0, 0), c(0, axesLeng[2], 0),
c(0, 0, axesLeng[3]))
rgl::text3d(axes, text = axesNames[1:3], color='black', adj = c(0.5, -0.8))
if(axesTicks){
rgl::axis3d(edge = 'x', pos = c(NA,0,0), color='black')
rgl::axis3d(edge = 'y', pos = c(0,NA,0), color='black')
rgl::axis3d(edge = 'z', pos = c(0,0,NA), color='black')
}
rgl::view3d(theta = -120, phi = 0, zoom=zoom)
### grid on floor:
x <- seq(0,axesLeng[1], length.out = 10)
z <- seq(0,axesLeng[3],length.out = 10)
y <- matrix(rep(0, 100), ncol = 10)
rgl::surface3d(x = x, y = y, z = z,
color = gridCol,
alpha = 0.5, lit = FALSE,front = "lines", back = "lines")
}
#*********************************************************************************
# MIXED MODEL DGP ####
#*********************************************************************************
mmdgp <- function(n=200, nC=5, sd_S=3, sd_C=5, sd_e=2, b0=50, tb=c(0,4,10,4,0),
genb=10, ageb=1, xwithnb=2, sd_xwithn=1){
### Check some condition:
if(n%%nC!=0){stop('There has to be an equal number of people in each class. Currently, the number of people is not divisible by the number of classes.')}
### Generate subjects:
id <- factor(paste0('S', 1:n), levels = paste0('S', 1:n))
### Generate random intercept of subjects:
id.ri <- rnorm(n, sd=sd_S)
### Generate age variable:
age <- ceiling(runif(n, min = 20, max = 70))
### Generate class factor:
class <- factor(paste0('Class', 1:nC), levels = paste0('Class', 1:nC))
### Generate class random intercept:
clss.ri <- rnorm(nC, sd=sd_C)
### Generate random slope values:
xwithn.rs <- rnorm(n, mean = 0, sd = sd_xwithn)
### Generate gender variable:
gen <- rep(0:1, length.out=n)
### Combine to data frame:
ds <- data.frame(id, class, gen, age, id.ri, clss.ri, xwithn.rs)
ds <- ds[order(ds$class, ds$id),] # Order after classes and individuals
ds$gen <- sample(ds$gen) # mix up gender
### Generate target variable for each day:
### First replicate each row in data frame for each day:
dat.0 <- replicate(length(tb), ds, simplify = FALSE)
dat.0 <- Reduce(f = function(a,b)rbind(a,b), x = dat.0)
dat.0 <- dat.0[order(dat.0$class, dat.0$id),] # Reorder
### Add day variable:
dat.0$day <- 1:length(tb)
### Add random error:
dat.0$err <- rnorm(nrow(dat.0), sd=sd_e)
### Add within variable:
### Split along id:
ds2 <- split(dat.0, f = dat.0$id)
### Add predictor (different variance for each id):
ds2.1 <- lapply(ds2, function(x){
x$xwithn <- rnorm(length(tb), mean = 0, sd = runif(1, min = 1, max = 5))
return(x)
})
### Merge:
dat.0 <- do.call(rbind, ds2.1)
### Create xwithn effect:
dat.0$xwithnEff <- (dat.0$xwithn.rs + xwithnb)*dat.0$xwithn
### Generate target variable for each day (bit messy...):
dd <- dat.0[, -which(colnames(dat.0) %in% c('id', 'class', 'day', "xwithn.rs", "xwithn"))] # Remove some variables
dd$dayEff <- tb
dd$b0 <- b0
dat.0$y <- apply(dd, 1, function(x){
t(as.matrix(x))%*%c(genb, ageb, 1, 1, 1, 1, 1, 1)
})
### Remove the columns not needed:
dat <- dat.0
dat$id.ri <- NULL
dat$clss.ri <- NULL
dat$err <- NULL
dat$xwithn.rs <- NULL
dat$xwithnEff <- NULL
### Gender to factor:
dat$gen <- factor(dat$gen, levels = c(0,1), labels = c('M','F'))
### Day to factor:
dat$day <- as.factor(dat$day)
### Return object:
return(dat)
}
#*********************************************************************************
# WIDE TO LONG DATA FORMAT ####
#*********************************************************************************
wideToLong <- function(x, nRep=NULL, ind='T*_', indCust=NULL,
repColnm='repIdentifier', ind_atEnd = FALSE, ignore_unbal = FALSE,
verbose = FALSE, rmv_linkchar = NULL, shw.mssg = TRUE){
### Check some conditions:
if(is.null(nRep) & is.null(indCust)){
stop('Either nRep or indCust must be supplied')
}
### Get names of rep-identifier:
if(is.null(indCust)){
rnms <- sapply(1:nRep, function(y)gsub(pattern = '\\*', replacement = y, ind))
}else{
rnms <- indCust
}
### Function to extract n characters at beginning or end of string:
extr_char <- function(wrd, n, atEnd){
if(atEnd){
char_pos <- substring(wrd, first = nchar(wrd)-(n-1), last = nchar(wrd))
char_neg <- substring(wrd, first = 1, last = nchar(wrd)-n)
}else{
char_pos <- substring(wrd, first = 1, last = n)
char_neg <- substring(wrd, first = n+1, last = nchar(wrd))
}
return(list("pos"=char_pos, "neg"=char_neg))
}
### Find columns with repeated variables:
#' This is done in a loop, check older version for solution
#' without loop (it was less robust, that's why I changed)
### Preparations:
rnms_ord <- rnms[order(nchar(rnms), decreasing = TRUE)] # Ordered according to word length (in case one term is in another)
tvars.v <- NA
tvars.t <- NA
### Start loop:
for(j in 1:ncol(x)){ # iterate through columns
for(i in 1:length(rnms_ord)){ # Iterate through repidentifiers
### Check current colname:
exres <- extr_char(wrd = colnames(x)[j], n = nchar(rnms_ord[i]), atEnd = ind_atEnd)
if(exres$pos == rnms_ord[i]){
### Collect strings:
tvars.t[j] <- exres$pos
tvars.v[j] <- exres$neg
break() # Break for loop
}
}
}
### Indices of repeated measure vars:
tvr <- which(!is.na(tvars.v))
if(length(tvr)==0){stop('There are no repeated measurement variables. Perhaps search string is at end (ind_atEnd argument)?')}
### Remove NAs:
tvars.v <- tvars.v[!is.na(tvars.v)]
tvars.t <- tvars.t[!is.na(tvars.t)]
### Data frame to test names:
tvars.d <- data.frame(tvars.v, tvars.t)
### Turn indicator column to factor in order to include all factor levels:
tvars.d$tvars.t <- factor(tvars.d$tvars.t, levels=rnms)
### Check whether all variables are present:
if(!all(table(tvars.d)==1)){
if(!ignore_unbal){
print(table(tvars.d))
print('There is a problem with the naming of the repeated variables. Check returned table (printed above), should all be equal to 1. You can set the argument ignore_unbal to TRUE in order to continue (missing values will be filled with NAs).')
return(table(tvars.d))
}
}
#**********************************************
#' First prepare wide data. Make sure all repeated variables are of same type
#' and all the missing columns are added.
### Separate repeated and fixed variables:
dfix <- x[, -tvr, drop=FALSE]
drep <- x[, tvr, drop=FALSE]
### Initiate result object:
drepL <- list()
allcL <- list()
### Iterate through repeated variables:
tvars.vu <- unique(tvars.v) # Unique vars
for(i in 1:length(tvars.vu)){
### Set control variables:
trn2fac <- FALSE
### All theoretically expected columns:
allc <- if(ind_atEnd) {paste0(tvars.vu[i], rnms)} else {paste0(rnms, tvars.vu[i])}
allcL[[i]] <- allc # Store
### Data with actually available columns:
di <- drep[, colnames(drep)%in%allc, drop=FALSE]
### Not available columnnames:
c_noavail <- allc[!(allc%in%colnames(drep))]
### Check classes of columns:
### First check if only numeric:
allnum <- all(sapply(di, function(z) inherits(z, what = c('numeric', 'integer')) ))
if(!allnum){ # Need only to do something if not numeric
if(shw.mssg){
message(paste('The repeated variable ', tvars.vu[i], 'is not numeric or integer. The function will automatically turn such a variable into a character column in the long format data (with the exception of factors which are retained as factors).\n'))
}
### Check if not always the same class:
if(length(unique(sapply(di, function(z)class(z))))!=1){
warning(paste0('The variable ', tvars.vu[i]), ' has not the same class for all repeated occasions (e.g. timepoints). The function will turn this variable into a character column.\n')
### Next check if factor:
}else if(inherits(di[,1], what = 'factor')){ # Here all columns can only have same class (see if above)
### Note to turn to factor again:
trn2fac <- TRUE
### Check if all levels are the same:
lvls <- lapply(di, function(z)levels(z))
allsame <- all(sapply(lvls[-1], function(z) identical(z, lvls[[1]])))
### Get all levels:
allevels <- unique(do.call(c, lvls))
### Warning in case not all levels are the same:
if(!allsame){
warning(paste0('The variable ', tvars.vu[i], ' is a factor but not all repeated occasions (e.g. timepoints) have the same levels. The function will turn the variable in long format into a new factor with all the levels that were found in the different repeated occasions.\n'))
}
}
}
#' After the above block to check the available data and give appropriate
#' warnings, I now add the missing columns and make sure all columns are of
#' the same type.
### Add columns which are missing:
di[, c_noavail] <- NA
### Order columns:
di <- di[, match(allc, colnames(di))]
### In case of non numeric turn to character:
if(!allnum){
di <- data.frame(lapply(di, as.character))
### Check if factor:
if(trn2fac){
### Turn each column to factor with all found levels:
di <- data.frame(lapply(di, function(z) factor(z, levels = allevels) ))
}
}
### Store data:
drepL[[i]] <- di
}
### Merge cleaned data:
drep_clnd <- do.call(cbind, drepL)
#**********************************************
#' After preparing the data set we can easily turn them into long format.
### Initiate result object:
L <- list()
### Iterate through every single row and collect long format in list:
for(i in 1:nrow(x)){
### Prepare data:
tmp.f <- dfix[i, , drop=FALSE] # Fixed variables
tmp.v <- drep_clnd[i, , drop=FALSE] # Repeated measures variables
tmp <- tmp.f[rep(1, length(rnms)),, drop=FALSE] # Repeat row for each repeated measurement
tmp[, repColnm] <- rnms # Add rep-identifiers
### Iterate through repeated variables:
for(j in 1:length(tvars.vu)){
### Get the relevant columns:
clnms <- allcL[[j]]
### Iterate through repeated measures:
fillL <- list() # Initiate
for(k in 1:length(clnms)){
matchres <- match(clnms[k], colnames(tmp.v))
### Check if NA:
fill_k <- tmp.v[1, matchres]
fillL[[k]] <- fill_k
}
### Fill in:
tmp[, tvars.vu[j]] <- do.call(c, fillL) # This only works because of above preparation/cleaning of data
}
### Add to list:
L[[i]] <- tmp
### Progress:
if(verbose) checkprogress(val = i, endi = nrow(x))
}
### Put everything together:
d <- do.call(rbind, L)
### Turn rep-identifier to factor:
### Remove linking character:
if(!is.null(rmv_linkchar)){
if(ind_atEnd){
## Remove from beginning:
rnms_lab <- substring(rnms, first = 1+rmv_linkchar, last = nchar(rnms))
}else{
### Remove from end:
rnms_lab <- substring(rnms, first = 1, last = nchar(rnms)- rmv_linkchar)
}}else{rnms_lab <- rnms}
### Create factor:
d[, repColnm] <- factor(d[, repColnm], levels = rnms, labels = rnms_lab)
### Return object:
return(d)
}
#*********************************************************************************
# NICE NA PLOT ####
#*********************************************************************************
niceNaPlot <- function(x, IDvar=NULL, show_xlab=TRUE, forceUnaggr=FALSE,
nClust=100, critVal=10000, verbose = TRUE){
### First make sure that the names which I use to add as columns to the data, do not already exist:
stopifnot(all(!(c('dupIdentifierNiceNA_YR', 'kmclusterNiceNA_YR', 'withinClusterOrderNiceNA_YR', 'betweenClusterOrderNiceNA_YR') %in% colnames(x))))
### If supplied, add IDvar as rownames and then remove it:
if(is.null(IDvar)){
IDvar <- paste0('Obs', 1:nrow(x))
rownames(x) <- IDvar
}else{
### In case there are repetitions of identifier values (rownames must be unique):
if(length(unique(x[,IDvar])) != nrow(x)){
warning('There are repetitions of IDvar values. Will create unique values for the plot.')
x <- x[order(x[,IDvar]),] # Make sure identifier are ordered
id.rle <- rle(as.character(x[,IDvar]))
id.uni <- paste0(rep(id.rle$values, times = id.rle$lengths), "_",
unlist(lapply(id.rle$lengths, seq_len)))
}else{id.uni <- x[,IDvar]}
### Add as rownames:
rownames(x) <- id.uni
x[,IDvar] <- NULL
}
### Create NA table:
x <- data.frame(is.na(x)*1)
### Remove duplicates to make clustering more efficient:
### Get each row as one number:
x$dupIdentifierNiceNA_YR <- apply(x, 1, function(z) paste(z, collapse = '') )
x_nodup <- x[!duplicated(x), ]
#' In case of too many rows, R will crash due to the large distance matrix. Therefore,
#' need to enable an aggregated version of the hierarchical clustering.
### Check size of data:
if(nrow(x_nodup) > critVal & !forceUnaggr){
### Data is too large, first find clusters of data using kmeans:
message(paste0('The data has more than ', critVal, ' unique NA-patterns. Therefore, the data will first be aggregated using kmeans clustering (with ', nClust, ' clusters) on which finally hierarchical clustering will be performed. You can also force unaggregated clustering or change the numbers of kmeans-clusters (see helppage).\n'))
km <- kmeans(x_nodup[, -which(colnames(x_nodup)%in%c('dupIdentifierNiceNA_YR'))], centers=nClust, iter.max=1000)
### Add cluster number and turn to data frame:
x_kmns <- cbind(x_nodup, "kmclusterNiceNA_YR"=km$cluster)
### Split:
x_kmns_splt <- split(x_kmns, x_kmns$kmclusterNiceNA_YR)
### Order inside clusters:
x_kmns_splt_ord <- list()
for(i in 1:length(x_kmns_splt)){
### Apply hierarchical clustering:
z <- x_kmns_splt[[i]]
zred <- z[, -which(colnames(z)%in%c('dupIdentifierNiceNA_YR', 'kmclusterNiceNA_YR'))] # Remove not needed columns for clustering
### hierarchical clustering:
dis <- dist(zred)
hc <- hclust(dis, method = 'ward.D2')
z$withinClusterOrderNiceNA_YR <- hc$order
x_kmns_splt_ord[[i]] <- z
### Progress:
if(verbose) checkprogress(val = i, endi = length(x_kmns_splt))
}
### Merge:
x_kmnsmerg <- do.call(rbind, x_kmns_splt_ord)
### Order the cluster centers with hierarchical clustering:
dis <- dist(km$centers)
hc_cent <- hclust(dis, method='ward.D2')
### Merge:
x_kmnscntr <- data.frame('kmclusterNiceNA_YR'=rownames(km$centers), 'betweenClusterOrderNiceNA_YR'=hc_cent$order)
### Merge together:
xmrg_nodup <- merge(x_kmnsmerg, x_kmnscntr)
### Merge with data including duplications:
xmrg <- merge(x, xmrg_nodup)
### Reorder:
x_ord <- xmrg[order(xmrg$betweenClusterOrderNiceNA_YR, xmrg$withinClusterOrderNiceNA_YR),]
### Remove unwanted columns:
x_fin <- x_ord[, -which(colnames(x_ord)%in%c('dupIdentifierNiceNA_YR',
'kmclusterNiceNA_YR', 'withinClusterOrderNiceNA_YR',
'betweenClusterOrderNiceNA_YR'))]
}else{
### Apply hierarchical clustering of unaggregated data to find good ordering:
dis <- dist(x_nodup[, -which(colnames(x_nodup)%in%c('dupIdentifierNiceNA_YR')), drop=FALSE])
### Check if there are at least two unique NA-patterns:
if(length(dis)==0){
x_nodup$finalOrderNiceNA_YR <- 1
}else{
hc <- hclust(dis, method = 'ward.D2')
### Add order:
x_nodup$finalOrderNiceNA_YR <- hc$order
}
### Merge with data including duplications:
xmrg <- merge(x, x_nodup)
### Reorder:
x_ord <- xmrg[order(xmrg$finalOrderNiceNA_YR), ]
### Remove unwanted columns:
x_fin <- x_ord[, -which(colnames(x_ord)%in%c('dupIdentifierNiceNA_YR', 'finalOrderNiceNA_YR'))]
}
### Overwrite original data:
x <- as.matrix(x_fin)
### Create plot:
olmar <- newmar <- par('mar')
newmar[c(2,4)] <- newmar[c(2,4)]+4
par('mar'=newmar)
xx <- 1:nrow(x)
yy <- 1:ncol(x)
image(xx,yy,x,col=c('lightblue', 'darkgrey'), xaxt='n',
yaxt='n', xlab='', ylab = '')
### Set xlab:
if(show_xlab){
xlbs <- rownames(x)
}else{
xlbs <- rep('', nrow(x))
}
### Add axes:
axis(side = 1, at = 1:nrow(x), labels = xlbs, las=2)
axis(side = 2, at = 1:ncol(x), labels = colnames(x), las=1)
par(xpd=TRUE)
legend(x = par('usr')[2], y=par('usr')[4], legend = c('present', 'missing'),
pch=15, col=c('lightblue', 'darkgrey'), pt.cex=2, bg='grey90', box.lty = 'blank')
par('mar'=olmar)
par(xpd=FALSE) # Return to default
### In case one wants the ordered is.na-table:
silentReturn <- x
}
#*********************************************************************************
# LONG TO WIDE DATA FORMAT ####
#*********************************************************************************
longToWide <- function(x, IDvar, repColnm, repVars, colNmStr='', verbose.mssg=TRUE, verbose.warn=TRUE){
### Make sure IDvar and repColnm are factors:
stopifnot(is.factor(x[,repColnm]))
stopifnot(is.factor(x[,IDvar]))
### Remove cases with missing values in repColnm or IDvar:
nasm.rep <- sum(is.na(x[,repColnm]))
nasm.id <- sum(is.na(x[,IDvar]))
if(nasm.rep != 0){
if(verbose.warn) warning(paste0('There are ', nasm.rep,' missing value(s) in the ', repColnm ,' variable. Will remove these observations.'))
x <- x[!is.na(x[,repColnm]),]
}
if(nasm.id != 0){
if(verbose.warn) warning(paste0('There are ', nasm.id,' missing value(s) in the IDvar variable. Will remove these observations.'))
x <- x[!is.na(x[,IDvar]),]
}
### Check whether there are multiple entries for a repColnm level for one ID:
if(max(table(x[,IDvar], x[,repColnm])) > 1){
print(table(x[,IDvar], x[,repColnm]))
stop(paste0('There are multiple entries for a ', repColnm, ' level for a level of ', IDvar, '. Check the printed table to find out more.'))
}
### Get the levels of repColnm:
lvs <- levels(x[,repColnm])
### Split data according to observation-ID:
x.sp <- split(x, x[, IDvar])
### Loop through the IDs:
xwL <- list()
for(i in 1:length(x.sp)){
### Get data:
d <- x.sp[[i]]
### Check whether fixed variables have varying values:
dfi <- d[, !(colnames(d) %in% repVars) & colnames(d)!=repColnm]
if(!all(sapply(dfi, function(x){length(unique(x))==1}))){
if(verbose.warn) warning(paste0('There are varying values in presumably fixed variable(s) for observation-ID ', levels(x[,IDvar])[i],'. Will use the values of the first row.'))
}
### Check whether levels of repColnm are missing and adjust:
if(length(unique(d[,repColnm])) < length(lvs)){
### Add number of missing rows:
mss <- length(lvs) - length(unique(d[,repColnm]))
dd <- d[rep(1, mss),]
### Fill in appropriate values:
dd[, repVars] <- NA
dd[, repColnm] <- lvs[!lvs %in% unique(d[,repColnm])]
### Merge with data:
d <- rbind(d,dd)
### Give a message:
if(verbose.mssg) message(paste0('Observation-ID ', levels(x[,IDvar])[i], ' did not have rows for all levels of ', repColnm, '.\n'))
}
### Order according to repCol:
d <- d[order(d[,repColnm]),]
### Create the wide-format data:
dwf0 <- d[, !(colnames(d) %in% repVars) & colnames(d)!=repColnm & colnames(d)!=IDvar, drop=FALSE] # Only fixed variables
dwf <- cbind(d[,IDvar, drop=FALSE], dwf0)[1,,drop=FALSE] # Put IDvar in beginning
dwr <- d[, (colnames(d) %in% repVars), drop=FALSE] # Only repeated variables
### Iterate through the repVar columns:
djL <- list()
for(j in 1:ncol(dwr)){
### wide data format:
dj <- do.call(data.frame, as.list(dwr[,j]))
### Add appropriate colnames:
cn0 <- expand.grid(colnames(dwr)[j], lvs)
cn1 <- do.call(paste0, list(cn0[,2], colNmStr, cn0[,1]))
colnames(dj) <- cn1
djL[[j]] <- dj
}
### Combine data:
dw <- cbind(dwf, do.call(cbind, djL))
xwL[[i]] <- dw
}
### Merge into final data frame:
xw <- do.call(rbind, xwL)
return(xw)
}
#*********************************************************************************
# MAKE COLOR TRANSPARENT ####
#*********************************************************************************
mktransp <- function(color, alpha=100){
stopifnot(alpha >= 0 & alpha <= 255)
newCol<-col2rgb(color)
transCol <- apply(newCol, 2, function(x){rgb(red=x[1], green=x[2],
blue=x[3], alpha=alpha,
maxColorValue=255)})
return(transCol)
}
#*********************************************************************************
# ADD IMAGE TO PLOT ####
#*********************************************************************************
addImgToPlot <- function(filename, x=NULL, y = NULL, width = NULL, angle=0, interpolate = TRUE){
if(is.null(x) | is.null(y) | is.null(width)){stop("Must provide args 'x', 'y', and 'width'")}
### Turn image to array object:
if(grepl(pattern = '.png', filename)){
obj <- png::readPNG(source = filename)
}else if(grepl(pattern = '.jpg', filename)){
obj <- jpeg::readJPEG(source = filename)
}else{
stop('You must provide a filepath ending with .png or .jpg')
}
### Get some of the relevant measures:
USR <- par()$usr # A vector of the form c(x1, x2, y1, y2) giving the extremes of the user coordinates of the plotting region
PIN <- par()$pin # The current plot dimensions, (width, height), in inches
DIM <- dim(obj) # number of x-y pixels for the image
ARp <- DIM[1]/DIM[2] # pixel aspect ratio (y/x)
WIDi <- width/(USR[2]-USR[1])*PIN[1] # convert width units to inches
HEIi <- WIDi * ARp # height in inches
height <- HEIi/PIN[2]*(USR[4]-USR[3]) # height in units
#' Have to adapt position of center in case of rotation
#' because rotation is per default around bottom left corner.
### Angle of center relative to left bottom before rotation:
ang0 <- atan(HEIi/WIDi)*180/pi
### Distance from corner to center in inches:
z <- sqrt((HEIi/2)^2 + (WIDi/2)^2)
### Angle after rotation
ang1 <- ang0 + angle
### Coordinates of center relative to bottom-left corner after rotation:
### In inches:
xaR_inch <- z * cos(ang1 * pi /180)
yaR_inch <- z * sin(ang1 * pi /180)
### Convert to units:
xaR <- xaR_inch/PIN[1]*(USR[2]-USR[1])
yaR <- yaR_inch/PIN[2]*(USR[4]-USR[3])
### Coordinates of center relative to bottom-left corner before rotation:
xbR <- width/2
ybR <- height/2
### Shift of center through rotation:
xshft <- xbR - xaR
yshft <- ybR - yaR
### Draw image:
rasterImage(image = obj,
xleft = (x+xshft)-(width/2), xright = (x+xshft)+(width/2),
ybottom = (y+yshft)-(height/2), ytop = (y+yshft)+(height/2),
interpolate = interpolate, angle = angle)
}
#*********************************************************************************
# PREVALENCE TABLE ####
#*********************************************************************************
prevTabl <- function(X, FUN, catVar=NULL, atLeastOnce=FALSE){
### Create default catVar:
if(is.null(catVar)){
catVar <- factor(rep('', nrow(X)))
}
stopifnot(is.factor(catVar)) # Must be a factor
### Split data according to groups:
Xsp <- split(X, f = catVar)
### Apply function to data:
tfTbl <- lapply(Xsp, FUN = FUN)
### Count successes:
xs <- lapply(tfTbl, FUN = function(x){apply(x, 2, sum, na.rm=TRUE)})
xstbl <- do.call(rbind, xs)
### Number of entries:
xn <- lapply(tfTbl, FUN=function(x){apply(x, 2, function(y){sum(!is.na(y), na.rm = TRUE)})})
xntbl <- do.call(rbind, xn)
### Calculate the rates:
maprt <- Map(f = function(xx,nn){xx/nn}, xx=xs, nn=xn)
xrat <- do.call(rbind, maprt) # rbind results
### Calculate "onceTrue" column:
### Count successes:
xonce_tf <- lapply(tfTbl, FUN = function(x){apply(x,1, any, na.rm=TRUE)})
xonce_s <- lapply(xonce_tf, sum, na.rm=TRUE)
xonce_stbl <- do.call(rbind, xonce_s)
### Number of entries:
xonce_n <- lapply(xonce_tf, function(x){sum(!is.na(x))})
xonce_ntbl <- do.call(rbind, xonce_n)
### Calculate rate:
xonce_maprt <- Map(f = function(xx,nn){xx/nn}, xx=xonce_s, nn=xonce_n)
xonce_rat <- do.call(rbind, xonce_maprt)
colnames(xonce_rat) <- 'atLeastOnce'
### Create final table:
mapfin <- Map(f = function(x,n){paste0(x, '/', n, ' (', round(x/n*100, 1),'%)')}, x=xs, n=xn)
fintbl <- noquote(do.call(rbind, mapfin))
colnames(fintbl) <- colnames(X)
### Add "onceTrue" column:
if(atLeastOnce){
### Generate final output:
maponcefin <- Map(f = function(x,n){paste0(x, '/', n, ' (', round(x/n*100, 1),'%)')}, x=xonce_s, n=xonce_n)
oncefintbl <- noquote(do.call(rbind, maponcefin))
colnames(oncefintbl) <- colnames(xonce_rat)
### Add to final table:
fintbl <- noquote(cbind(fintbl, oncefintbl))
}
### Collect output:
if(atLeastOnce){
L <- list(xs=xstbl, xn=xntbl, xrat=xrat, xonce_s=xonce_stbl,
xonce_n=xonce_ntbl, xonce_rat=xonce_rat, fintbl=fintbl)
}else{
L <- list(xs=xstbl, xn=xntbl, xrat=xrat, fintbl=fintbl)
}
### Print final table:
print(fintbl)
### Silent return of result-list:
invisible(L)
}
#*********************************************************************************
# STANDARDIZE DATA TABLE ####
#*********************************************************************************
standizDat <- function(x, chngName = TRUE, onlyCenter = FALSE){
### Change colnames of data:
if(chngName){
tag <- ifelse(onlyCenter, '_cntr', '_stnd')
colnames(x)[sapply(x, is.numeric)] <- paste0(colnames(x)[sapply(x, is.numeric)],
tag)
}
### Function to be used in lapply:
lapp_f <- function(y){
if(is.numeric(y)){
rval <- scale(y, scale = !onlyCenter)
} else{
rval <- y
}
return(rval)
}
### Lapply call:
lap_res <- lapply(x, lapp_f)
### combine to dataframe:
rval <- do.call(data.frame, lap_res)
return(rval)
}
#*********************************************************************************
# REMOVE OUTLIERS FROM DATA ####
#*********************************************************************************
rmOutliers <- function(x, chngName = TRUE){
### Change colnames of data:
if(chngName){
colnames(x)[sapply(x, is.numeric)] <- paste0(colnames(x)[sapply(x, is.numeric)],
'_noOutl')
}
### Function to be used in lapply:
lapp_f <- function(y){
if(is.numeric(y)){
### Find outliers with boxplot func:
outl <- boxplot.stats(y)$out
outl_id <- which(y %in% outl)
y[outl_id] <- NA
rval <- y
} else{
rval <- y
}
return(rval)
}
### Lapply call:
lap_res <- lapply(x, lapp_f)
### combine to dataframe:
rval <- do.call(data.frame, lap_res)
return(rval)
}
#*********************************************************************************
# PROGRESS BAR FOR LOOP ####
#*********************************************************************************
checkprogress <- function(val, endi, starti = 1){
setTxtProgressBar(txtProgressBar(min=starti-1, max=endi, style = 3), value = val)
}
#*********************************************************************************
# FLATTEN RECURSIVE LIST ####
#*********************************************************************************
#' Taken from internet
#' (https://stackoverflow.com/questions/47603578/flatten-recursive-list)
flatten <- function(x) {
if (!inherits(x, "list")) return(list(x))
else return(unlist(c(lapply(x, flatten)), recursive = FALSE))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.