Nothing
plotCont <- function(data.A, data.B, xlab.A, xlab.B=NULL, w.A=NULL, w.B=NULL,
type="density", ref=FALSE){
###################################################
hist.bks <- function(x, w=NULL, n=NULL){
xx <- x[!is.na(x)]
if(is.null(w)) {
# iqr.x <- IQR(xx)
qq <- quantile(x = xx, probs = c(0.25, 0.5, 0.75))
deff.uw <- 1
}
else{
ww <- w[!is.na(x)]
qq <- Hmisc::wtd.quantile(x = xx, weights = ww,
probs = c(0.25, 0.5, 0.75))
deff.uw <- length(ww) * sum(ww^2)/(sum(ww)^2)
}
iqr.x <- c(qq[3] - qq[1])
if(is.null(n)) nn <- length(xx) / deff.uw
else nn <- n
wid.FD <- 2 * iqr.x* 1/(nn^(1/3))
# robust boxplot fences
low <- max(min(xx), qq[1] - 1.5*2*(qq[2]-qq[1]) )
up <- min(max(xx), qq[3] + 1.5*2*(qq[3]-qq[2]) )
bins <- (up - low)/wid.FD
bins <- floor(bins)+1
if(bins==1){
bins <- 2
wid.FD <- (max(xx) - min(xx))/bins
}
# adjust for low and up
span <- bins*wid.FD
dd <- span - (up - low)
low <- low - dd/2
up <- up + dd/2
# final breaks
seq(from=low, to=up, by=wid.FD)
}
#################################################################
# preparation
nA <- nrow(data.A)
nB <- nrow(data.B)
n <- min(nA, nB)
if(is.null(xlab.B)) xlab.B <- xlab.A
if(xlab.A==xlab.B) pr.lab <- xlab.A
else pr.lab <- "x"
xA <- data.A[, xlab.A]
xB <- data.B[, xlab.B]
# note: weights are scaled to sum to n
if(!is.null(w.A)) ww.A <- data.A[, w.A]/sum(data.A[, w.A]) * nA
else ww.A <- rep(1, nA)
deff.uwA <- length(ww.A) * sum(ww.A^2)/(sum(ww.A)^2)
if(!is.null(w.B)) ww.B <- data.B[, w.B]/sum(data.B[, w.B]) * nB
else ww.B <- rep(1, nB)
deff.uwB <- length(ww.B) * sum(ww.B^2)/(sum(ww.B)^2)
############################################
# histograms & density plot comparing two distributions
if(type=="hist" | type=="density"){
if(ref) {
bks <- hist.bks(x=xB, w=ww.B, n=min(nA/deff.uwA, nB/deff.uwB))
if(bks[1] > min(xA)) bks[1] <- min(xA)
if(bks[length(bks)] < max(xA)) bks[length(bks)] <- max(xA)
}
else bks <- hist.bks(x=c(xA, xB), w=c(ww.A, ww.B), min(nA/deff.uwA, nB/deff.uwB))
#cat(bks, fill=T)
midp <- 0.5 * (bks[1:(length(bks)-1)] + bks[2:length(bks)])
#cat(midp, fill=T)
cxA <- cut(x = xA, breaks = bks, include.lowest = TRUE)
cxB <- cut(x = xB, breaks = bks, include.lowest = TRUE)
# tables
tA <- prop.table(table(cxA))
if(!is.null(w.A)) tA <- prop.table(xtabs(data.A[,w.A]~cxA))
tB <- prop.table(table(cxB))
if(!is.null(w.B)) tB <- prop.table(xtabs(data.B[,w.B]~cxB))
if(type=="hist"){
dfA <- cbind(data.frame(tA), sample="A")
dfB <- cbind(data.frame(tB), sample="B")
colnames(dfA) <- colnames(dfB) <- c("Var1", "Freq", "sample")
df <- rbind(dfA, dfB)
colnames(df) <- c("Var1", "Freq", "sample")
# calculates tot. var dist.
tvd <- 100*(1/2)*sum(abs(tA-tB))
labtvd <- paste("tvd ",
paste(round(tvd, 2), "%", sep=" "),
sep="= ")
btmlab <- paste(pr.lab, labtvd, sep=", ")
out <- ggplot(data = df,
aes(x = .data$Var1, y = .data$Freq,
fill = .data$sample)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = btmlab, y="rel freq")
}
if(type=="density"){
dfxA = data.frame(x=midp, sample="A")
dfxB = data.frame(x=midp, sample="B")
xx <- rbind(dfxA, dfxB)
colnames(xx) <- c(pr.lab, "sample")
xx$w <- c(tA, tB)
out <- ggplot2::ggplot(xx, aes(xx[ ,pr.lab], weight=.data$w,
colour=.data$sample)) +
geom_density(alpha=0.4, lwd=0.8, adjust=0.5) +
labs(x = pr.lab)
}
}
###########################################
if(type=="ecdf"){
if(ref) usx <- unique(sort(xB))
else usx <- unique(sort(c(xA ,xB)))
k <- length(usx)
ecdf.xA <- ecdf.xB <- numeric(k)
for(i in 1:k){
ecdf.xA[i] <- sum(ww.A[xA <= usx[i]])
ecdf.xB[i] <- sum(ww.B[xB <= usx[i]])
}
dfxA = data.frame(x=usx, ecdf=ecdf.xA, sample="A")
dfxB = data.frame(x=usx, ecdf=ecdf.xB, sample="B")
xx <- rbind(dfxA, dfxB)
out <- ggplot2::ggplot(xx, aes(x=.data$x, y=.data$ecdf,
color=.data$sample)) +
geom_point(size=0.5) +
geom_line() +
xlab(label = xlab.A) +
ylab(label = "Empirical cumulative density function")
}
##############################################
# comparison of empirical quantiles
if(type=="qqplot" | type=="qqshift"){
# decision concerning quantiles
if(n <= 50) pctp <- c(0.25, 0.50, 0.75)
if(n > 50 & n <= 150) pctp <- seq(from = 0.2, to = 0.8, by = 0.2)
if(n > 150 & n <= 250) pctp <- seq(from = 0.1, to = 0.9, by = 0.1)
if(n > 250) pctp <- seq(from = 0.05, to = 0.95, by = 0.05)
# derive empirical quantiles
qA <- quantile(x = xA, probs = pctp)
if(!is.null(w.A)) {
qA <- Hmisc::wtd.quantile(x=xA,
weights=data.A[, w.A],
probs=pctp)
}
qB <- quantile(x = xB, probs = pctp)
if(!is.null(w.B)) {
qB <- Hmisc::wtd.quantile(x=xB,
weights=data.B[, w.B],
probs=pctp)
}
if(type=="qqplot"){
qq <- data.frame(qA=qA, qB=qB)
#colnames(qq) <- paste(xlab, c("A","B"), sep=".")
out <- ggplot2::ggplot(qq, aes(x=qB, y=qA)) +
geom_point(shape=19, color="orangered1", size=3) +
geom_abline(intercept = 0, slope = 1, color="blue", size=1.5,
linetype="dashed") +
xlab(label = paste("Quantiles", pr.lab, "B", sep=" ")) +
ylab(label = paste("Quantiles", pr.lab, "A", sep=" "))
}
#############################################################
# shift between two empirical distributions
# i.e. scatterplot of difference between quantiles (Y)
# and quantiles in A (X)
if(type=="qqshift"){
qq <- data.frame(qA=qB, qB=qA-qB)
#colnames(qq) <- paste(xlab, c("A","B"), sep=".")
out <- ggplot(qq, aes(x=qA, y=qB)) +
geom_point(shape=19, color="orangered1", size=3) +
geom_hline(yintercept = 0, color="blue", size=1.5,
linetype="dashed") +
xlab(label = paste("Quantiles", pr.lab, "B", sep=" ")) +
ylab(label = "Quantiles.A - Quantiles.B")
}
}
###################################################################
#####################################################################
plot(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.