##-----------------------------------------------------------------------
##------------------------------------------------------------------------
## modified venn diaggram plotting function based on the function in ABarray
my.drawVennDiagram <- function (object, names, mar = rep(0.5, 4), cex = 1, ...)
{
## colors <- c("blue", "cyan", "orange")
colors <- rep("black",3)
if (!is(object, "VennCounts"))
object <- vennCounts(object)
nsets <- ncol(object) - 1
if (nsets > 3)
stop("Can't plot Venn diagram for more than 3 sets")
if (missing(names))
names <- colnames(object)[1:nsets]
counts <- object[, "Counts"]
totalCounts = sum(counts)
theta <- 2 * pi * (1:360)/360
xcentres <- list(0, c(-1, 1), c(-1, 1, 0))[[nsets]]
ycentres <- list(0, c(0, 0), c(1/sqrt(5), 1/sqrt(5), -2/sqrt(5)))[[nsets]]
r <- c(1.6, 1.6, 1.6)[nsets]
xtext <- list(-1.2, c(-1.2, 1.2), c(-1.2, 1.2, 0))[[nsets]]
ytext <- list(1.8, c(1.8, 1.8), c(2.4, 2.4, -3))[[nsets]]
old.par <- par(mar = mar)
on.exit(par(old.par))
plot(x = 0, y = 0, type = "n", xlim = c(-4, 4), ylim = c(-4,
4), xlab = "", ylab = "", axes = FALSE, ...)
for (circle in 1:nsets) {
lines(xcentres[circle] + r * cos(theta), ycentres[circle] +
r * sin(theta), col = colors[circle], lwd = 3)
text(xtext[circle], ytext[circle], names[circle], cex = cex)
}
switch(nsets, {
rect(-3, -2.5, 3, 2.5)
text(2.3, -2.1, totalCounts, cex = cex)
text(0, 0, counts[2], cex = cex)
}, {
rect(-3, -2.5, 3, 2.5)
text(2.3, -2.1, totalCounts, cex = cex)
text(1.5, 0.1, counts[2], cex = cex)
text(-1.5, 0.1, counts[3], cex = cex)
text(0, 0.1, counts[4], cex = cex)
}, {
rect(-3, -3.5, 3, 3.3)
text(2.5, -3, totalCounts, cex = cex)
text(0, -1.7, counts[2], cex = cex)
text(1.5, 1, counts[3], cex = cex)
text(0.75, -0.35, counts[4], cex = cex)
text(-1.5, 1, counts[5], cex = cex)
text(-0.75, -0.35, counts[6], cex = cex)
text(0, 0.9, counts[7], cex = cex)
text(0, 0, counts[8], cex = cex)
})
invisible()
}
my.doVennDiagram <- function (a, b, c = NULL, names, ...)
{
require(limma)
if (is.null(c)) {
list.all <- union(a, b)
list.mat <- matrix(0, nrow = length(list.all), ncol = 2)
colnames(list.mat) <- c("list1", "list2")
for (i in 1:length(list.all)) {
list.mat[i, 1] <- list.all[i] %in% a
list.mat[i, 2] <- list.all[i] %in% b
}
list.venn <- vennCounts(list.mat)
my.drawVennDiagram(list.venn, names = names, cex = 1.5, ...)
ab <- intersect(which(list.mat[, 1] == 1),
which(list.mat[, 2] == 1))
list.ab <- vector(length = length(list.all))
list.ab[ab] <- TRUE
fileName <- "Venn_list_1+2.csv"
if (!missing(names)) {
fileName <- paste("Venn", names[1], names[2], ".csv",
sep = "_")
}
write.table(list.all[list.ab], file = fileName, sep = ",")
print(paste("List information is written to file", fileName))
invisible(list.all[list.ab])
}
else {
list.all <- union(a, union(b, c))
list.mat <- matrix(0, nrow = length(list.all), ncol = 3)
colnames(list.mat) <- c("list1", "list2", "list3")
for (i in 1:length(list.all)) {
list.mat[i, 1] <- list.all[i] %in% a
list.mat[i, 2] <- list.all[i] %in% b
list.mat[i, 3] <- list.all[i] %in% c
}
list.venn <- vennCounts(list.mat)
my.drawVennDiagram(list.venn, names = names, cex = 1.5, ...)
ab <- intersect(which(list.mat[, 1] == 1), which(list.mat[,
2] == 1))
ac <- intersect(which(list.mat[, 1] == 1), which(list.mat[,
3] == 1))
bc <- intersect(which(list.mat[, 2] == 1), which(list.mat[,
3] == 1))
abc <- intersect(which(list.mat[, 1] == 1), bc)
vd <- matrix(0, nrow = length(list.all), ncol = 4)
rownames(vd) <- list.all
vd[, 1][ab] <- 1
vd[, 2][ac] <- 1
vd[, 3][bc] <- 1
vd[, 4][abc] <- 1
colnames(vd) <- c("list 1_2", "list 1_3", "list 2_3",
"list 1_2_3")
if (!missing(names)) {
colnames(vd) <- c(paste(names[1], names[2], sep = "+"),
paste(names[1], names[3], sep = "+"), paste(names[2],
names[3], sep = "+"), paste(names[1], names[2],
names[3], sep = "+"))
}
fileName <- "Venn_list_1+2+3.csv"
if (!missing(names)) {
fileName <- paste("Venn", names[1], names[2], names[3],
".csv", sep = "_")
}
write.table(vd, file = fileName, sep = ",", col.names = NA)
print(paste("List information is written to file", fileName))
invisible(vd)
}
}
##----------------------------------------------------------------------
## a wrapper function of obtaining p-values for fixed effects estimate
## from lmer based on likelihood ratio test. This function is written by
## Christopher Moore
## http://blog.lib.umn.edu/moor0554/canoemoore/2010/09/lmer_p-values_lrt.html
## Example:
## library(lme4)
## library(SASmixed)
## lmer.out <- lmer(strength ~ Program * Time + (Time|Subj), data=Weights)
## p.values.lmer(lmer.out)
p.values.lmer <- function(x) {
summary.model <- summary(x)
data.lmer <- data.frame(model.matrix(x))
names(data.lmer) <- names(fixef(x))
names(data.lmer) <- gsub(pattern=":", x=names(data.lmer),
replacement=".", fixed=T)
names(data.lmer) <- ifelse(names(data.lmer)=="(Intercept)",
"Intercept", names(data.lmer))
string.call <- strsplit(x=as.character(x@call), split=" + (", fixed=T)
var.dep <- unlist(strsplit(x=unlist(string.call)[2], " ~ ", fixed=T))[1]
vars.fixef <- names(data.lmer)
formula.ranef <- paste("+ (", string.call[[2]][-1], sep="")
formula.ranef <- paste(formula.ranef, collapse=" ")
formula.full <- as.formula(paste(var.dep, "~ -1 +",
paste(vars.fixef, collapse=" + "),
formula.ranef))
data.ranef <- data.frame(x@frame[,
which(names(x@frame) %in% names(ranef(x)))])
names(data.ranef) <- names(ranef(x))
data.lmer <- data.frame(x@frame[, 1], data.lmer, data.ranef)
names(data.lmer)[1] <- var.dep
out.full <- lmer(formula.full, data=data.lmer, REML=F)
p.value.LRT <- vector(length=length(vars.fixef))
for(i in 1:length(vars.fixef)) {
formula.reduced <- as.formula(paste(var.dep, "~ -1 +",
paste(vars.fixef[-i],
collapse=" + "),
formula.ranef))
out.reduced <- lmer(formula.reduced, data=data.lmer, REML=F)
print(paste("Reduced by:", vars.fixef[i]))
print(out.LRT <- data.frame(anova(out.full, out.reduced)))
p.value.LRT[i] <- round(out.LRT[2, 7], 3)
}
summary.model@coefs <- cbind(summary.model@coefs, p.value.LRT)
summary.model@methTitle <- c("\n", summary.model@methTitle,
"\n(p-values from comparing nested models fit by maximum likelihood)")
print(summary.model)
}
##---------------------------------------------------------------
## The following code is udsed for obtaing mcmcpvalues for mcmc samples from lmer object. It was provided by Dr. Douglas Bates:
##http://rwiki.sciviews.org/doku.php?id=guides:lmer-tests
mcmcpvalue <- function(samp)
{
## elementary version that creates an empirical p-value for the
## hypothesis that the columns of samp have mean zero versus a
## general multivariate distribution with elliptical contours.
## differences from the mean standardized by the observed
## variance-covariance factor
std <- backsolve(chol(var(samp)),
cbind(0, t(samp)) - colMeans(samp),
transpose = TRUE)
sqdist <- colSums(std * std)
sum(sqdist[-1] > sqdist[1])/nrow(samp)
}
##------------------------------------------------------------------------
##The following function is used to merge multiple metabolon metabolomic data
fmet.merge <- function(datnames=dat.names, cols.ann, by.col="COMP_ID",
names.ids,
names.mets,
mets.start){
##browser()
comps <- nc.mets <- NULL
nc.ann <- length(cols.ann)
n.dat <- length(datnames)
for (i in 1:n.dat){
x <- get(datnames[i])
nc.mets <- c(nc.mets, dim(x)[2]-mets.start[i]+1)
cols.na <- cols.ann[is.na(match(cols.ann, names(x)))]
if(length(cols.na)>0){
x.add <- data.frame(matrix(NA,nrow=dim(x)[1], nc=length(cols.na)))
dimnames(x.add)[[2]] <- cols.na
x <- data.frame(x.add,x)
}
comps <- rbind(comps,x[,cols.ann])
}
comp.ann <- apply(comps, 2, function(x){
tapply(x, as.vector(comps[, by.col]), function(y)
ifelse(any(!is.na(y)),y[!is.na(y)][1], NA))})
dat.mets <- data.frame(matrix(NA,nrow=dim(comp.ann)[1], ncol=sum(nc.mets)))
dimnames(dat.mets)[[2]] <- names.mets
nc.end <- cumsum(nc.mets)
nc.start <- c(1, nc.end[-length(nc.end)]+1)
dat.id <- data.frame(matrix(0,nrow=dim(comp.ann)[1], ncol=n.dat))
dimnames(dat.id)[[2]] <- names.ids
for( i in 1:n.dat){
x <- get(datnames[i])
id <- match(x[,by.col], as.vector(comp.ann[,by.col]))
dat.id[id,i] <- 1
dat.mets[id, nc.start[i]:nc.end[i]] <- x[,mets.start[i]:dim(x)[2]]
}
dat.all <- data.frame(dat.id, comp.ann, dat.mets)
return(dat.all)
}
##-------------------------------------------------------------
##Pwer and sample size calculation based on log-rank test
##
fpower.logrank <- function(n,p, hr, sig.level, power){
## this function calculate the power and sample size based on log-rank test for survival data
## n=number of events
## p=proportion of subjects in the experimental group
## hr=hazard ratio
## Power for a log-rank test is the same as power for a Cox model.
## For a 2-arm study
## d = (qnorm(.975) + qnorm(.85))^2 / (.2 *.8 * coef^2)
## d = # deaths required
## .975 = two sided alpha=.05
## .85 = power of .85
## .2, .8 = proportion in each group
## coef = Cox model coef you want power against.
## So for a 50% difference in hazard rates (or median survival times)
## coef = log(1.5).
## For a 50% change I get d=341. Now for the hard part of sample size
## in a survival study: how many people to you need to enroll and how
## long will you need to follow them, to observe 341 total deaths? This
## second step is usually a mix of prior knowlege, enrollment expectations,
## and wild speculation. In the words of a prior director of research at
## U of Rochester:"At the commencement of a study the incidence of the
## disease in question will drop by one half, and will not return to its
## former levels until the study ends or the principle investigator retires,
## whichever comes first." L.L.
## -- Terry Therneau
flr <- function(n,p, hr, sig.level,power){
n-(qnorm(sig.level/2)+qnorm(power))^2/(p*(1-p)*log(hr)*log(hr))
}
}
## Do power calculation of Freedman for Cox PH as set forth on page 733
## of 5th edition of Rosner, "Fundamentals of Biostatistics"
##
## k = ratio of (# subjects in exposed group) to
## (# subjects in control)
## t = max time of follow-up
## RR = (hazard rate for exposed group) /
## (hazard rate for control group)
## lambda[j] = Rosner's lambda_(j-1)
## delta[j] = Rosner's delta_(j-1)
##
survivalUtil.power <- function(n.1, n.2, t, RR, lambda, delta, alpha=0.05) {
pf <- survivalUtil.probFail(t, RR, lambda, delta)
k <- n.1/n.2
m <- n.1*pf$p.E + n.2*pf$p.C
pnorm(sqrt(k*m)*abs(RR - 1)/(k*RR + 1) - qnorm(1 - alpha/2))
}
##
## Do sample-size calculation of Freedman for Cox PH as set forth
## on page 735
## of 5th edition of Rosner, "Fundamentals of Biostatistics"
##
## k = ratio of (# subjects in exposed group) to
## (# subjects in control)
## t = max time of follow-up
## RR = (hazard rate for exposed group) /
## (hazard rate for control group)
## lambda[j] = Rosner's lambda_(j-1)
## delta[j] = Rosner's delta_(j-1)
##
## Value: n1 - number of people needed in exposed group
## n2 - number of people needed in control group
##
survivalUtil.sampSize <- function(k, t, RR, lambda, delta,
alpha=0.05, beta=.2) {
m <- ((1/k)*(((k*RR + 1)/(RR - 1))^2)*
((qnorm(1 - (alpha/2)) + qnorm(1 - beta))^2))
pf <- survivalUtil.probFail(t, RR, lambda, delta)
n.1 <- (m*k)/((k*pf$p.E) + pf$p.C)
n.2 <- m/((k*pf$p.E) + pf$p.C)
list(n.1=n.1, n.2=n.2)
}
##
## Compute probabilties of failure used by survivalUtil.power() and
## survivalUtil.sampSize()
##
survivalUtil.probFail <- function(t, RR, lambda, delta) {
A <- cumprod(1 - lambda)[1:t]
B <- cumprod(1 - (RR*lambda))[1:t]
C <- cumprod(1 - delta)[1:t]
D <- lambda[2:(t + 1)]*A*C
E <- RR*lambda[2:(t + 1)]*B*C
list(p.C=sum(D), p.E=sum(E))
}
##------------------------------------------------------------------
## Sample size calculation for diagnostic test based on Pepe MS (2003),
## The StattisticalEvaluation of Medical Tests for Classification and Prediction,
## Oxford University Press, New York.
fn.dtest <- function(d0.t, d1.t, d0.f, d1.f, TPFb, FPFb, alpha, beta, paired){
## This function calculate the sample size needed for comparing two diagnostic
## tests with gold standards available
if(paired){
TPPF <- (1+d1.t)*TPFb-1
FPPF <- (1+d1.f)*FPFb-1
if(TPPF<0) TPPF <- 0
if(FPPF<0) FPPF <- 0
##number of cases
n.d <- ((qnorm(sqrt(1-alpha))+qnorm(sqrt(1-beta)))/log(d1.t/d0.t))^2*
(((d1.t+1)*TPFb-2*TPPF)/(d1.t*TPFb^2))
## number of controls needed
n.nd <- ((qnorm(sqrt(1-alpha))+qnorm(sqrt(1-beta)))/log(d1.f/d0.f))^2*
(((d1.f+1)*FPFb-2*FPPF)/(d1.f*FPFb^2))
}
else{
n.d <- ##number of cases
n.d <- ((qnorm(sqrt(1-alpha))+qnorm(sqrt(1-beta)))/log(d1.t/d0.t))^2*
((d1.t+1-2*d1.t*TPFb)/(d1.t*TPFb))
## number of controls needed
n.nd <- ((qnorm(sqrt(1-alpha))+qnorm(sqrt(1-beta)))/log(d1.f/d0.f))^2*
((d1.f+1-2*d1.f*FPFb)/(d1.f*FPFb))
}
return(c(n.d=n.d, n.nd=n.nd))
}
##--------------------------------------------------------------
## Bayesian stopping rules for phase II trials with binary outcomes
##---------------------------------------------------------------
fp.tox <- function(r0=0.1, a0, b0, n.max=40){
## this function calculate the posterior probability that the toxicity
## rate exceeds the prespecified level r0 given various observations for
## sample size up to n.max, prior belief of the success/failure
## rate is specified using a beta distribution beta(a0, b0)
smry.prior <- list(mean=a0/(a0+b0),
W.90=c("5%"=qbeta(0.05,a0,b0),
"95%"=qbeta(0.95,a0,b0)))
smry.post <- vector(length=n.max, mode="list")
for(i in 1:n.max){
x <- 0:i
smry.post[[i]] <- 1-pbeta(r0, a0+x, b0+i-x)
names(smry.post[[i]]) <- as.character(0:i)
}
return(list(smry.prior=smry.prior, smry.post=smry.post))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.