Nothing
ez.combat <- function(df,
batch.var,
adjust.var = "all",
exclude.var = NULL,
model = NULL,
output = c("overwrite", "append"),
error = c("abort", "continue"),
use.eb = TRUE,
verbose = FALSE) {
orig.f <- df # keep df in original state for output
# Gather and check variable list ---------------------------------------------
if (!is.null(model)) {
iv.ls <- unlist(strsplit(as.character(model), split="[~+*: ]"))
iv.ls <- iv.ls[-which(iv.ls == "")]
for (i in 1:length(iv.ls)) {
df[ ,iv.ls[i]] <- as.numeric(df[ ,iv.ls[i]])
}
} else {
iv.ls <- NULL
}
model <- switch(class(model),
`NULL` = NULL,
`character` = model <- model.matrix(as.formula(model), df),
`formula` = model <- model.matrix(as.formula(model), df),
otherwise = error("[EZ.COMBAT] Cannot parse model"))
if (adjust.var[1] == "all") {
if (!is.null(iv.ls)) {
dv.ls <- colnames(df)[!(colnames(df) %in% iv.ls)]
} else {
dv.ls <- colnames(df)
}
} else {
dv.ls <- adjust.var
}
dv.ls <- dv.ls[!dv.ls %in% batch.var]
if (!is.null(exclude.var)) {
dv.ls <- dv.ls[!dv.ls %in% exclude.var]
}
orig.dv.ls <- dv.ls
if (length(dv.ls) > 100 & output[1] == "append") {
warning("[EZ.COMBAT] Appending so many variables to an existing dataframe may take a long time, consider overiting output instead.")
}
# Convert df to dat ----------------------------------------------------------
dat <- t(as.matrix(df[ ,dv.ls]))
# Check and remove constant variables (or abort) -----------------------------
sd.chk <- apply(df[ ,dv.ls], 2, sd, na.rm=TRUE)
sd.chk <- sd.chk == 0
if (any(sd.chk)) {
if (error[1] == "continue") {
dv.ls <- dv.ls[-which(sd.chk)]
dat <- t(as.matrix(df[ ,dv.ls]))
if (verbose) {
warning("[EZ.COMBAT] some variables have been excluded as they are constant across samples.\n")
}
} else {
stop("[EZ.COMBAT] aborted: some variables are constant across samples.\n")
}
}
# display method option ------------------------------------------------------
if (use.eb & verbose) {
cat("[EZ.COMBAT] Performing ComBat with empirical Bayes\n")
} else if (verbose) {
cat("[EZ.COMBAT] Performing ComBat without empirical Bayes (L/S model)\n")
}
# make batch a factor and make a set of indicators for batch -----------------
batch <- as.factor(df[ , batch.var])
batchmod <- model.matrix(~-1+batch)
if (verbose) cat("[EZ.COMBAT] Found",nlevels(batch),'batches\n')
# A few other characteristics on the batches ---------------------------------
n.batch <- nlevels(batch)
batches <- list()
for (i in 1:n.batch){ batches[[i]] <- which(batch == levels(batch)[i]) } # list of samples in each batch
n.batches <- sapply(batches, length)
n.array <- sum(n.batches)
# combine batch variable and covariates --------------------------------------
design <- cbind(batchmod,model)
# check for intercept in covariates, and drop if present ---------------------
check <- apply(design, 2, function(x) all(x == 1))
design <- as.matrix(design[,!check])
# Number of covariates or covariate levels
if (verbose) {
cat("[EZ.COMBAT] Adjusting for",
ncol(design)-ncol(batchmod),
"covariate(s) or covariate level(s)\n")
}
# Check if the design is confounded ------------------------------------------
if(qr(design)$rank<ncol(design)){
if(ncol(design)==(n.batch+1)){
stop("[EZ.COMBAT] The covariate is confounded with batch. Remove the covariate and rerun ComBat.")
}
if(ncol(design)>(n.batch+1)){
if((qr(design[,-c(1:n.batch)])$rank<ncol(design[,-c(1:n.batch)]))){
stop('[EZ.COMBAT] The covariates are confounded. Please remove one or more of the covariates so the design is not confounded.')
} else {
stop("[EZ.COMBAT] At least one covariate is confounded with batch. Please remove confounded covariates and rerun ComBat.")
}
}
}
# Standardize Data across features -------------------------------------------
if (verbose) cat('[EZ.COMBAT] Standardizing Data across features\n')
B.hat <- solve(t(design)%*%design)%*%t(design)%*%t(as.matrix(dat))
# Standarization Model -------------------------------------------------------
grand.mean <- t(n.batches/n.array)%*%B.hat[1:n.batch,]
var.pooled <- ((dat-t(design%*%B.hat))^2)%*%rep(1/n.array,n.array)
stand.mean <- t(grand.mean)%*%t(rep(1,n.array))
if(!is.null(design)){
tmp <- design;tmp[,c(1:n.batch)] <- 0
stand.mean <- stand.mean+t(tmp%*%B.hat)
}
s.data <- (dat-stand.mean)/(sqrt(var.pooled)%*%t(rep(1,n.array)))
# Get regression batch effect parameters -------------------------------------
if (use.eb){
if (verbose) cat("[EZ.COMBAT] Fitting L/S model and finding priors\n")
} else {
if (verbose) cat("[EZ.COMBAT] Fitting L/S model\n")
}
batch.design <- design[,1:n.batch]
gamma.hat <- solve(t(batch.design)%*%batch.design)%*%t(batch.design)%*%t(as.matrix(s.data))
delta.hat <- NULL
for (i in batches){
delta.hat <- rbind(delta.hat,apply(s.data[,i], 1, var,na.rm=T))
}
# Empirical Bayes correction: -----------------------------------------------
gamma.star <- delta.star <- NULL
gamma.bar <- t2 <- a.prior <- b.prior <- NULL
if (use.eb){
##Find Priors
gamma.bar <- apply(gamma.hat, 1, mean)
t2 <- apply(gamma.hat, 1, var)
a.prior <- apply(delta.hat, 1, aprior)
b.prior <- apply(delta.hat, 1, bprior)
# Find use.eb batch adjustments
if (verbose) cat("[EZ.COMBAT] Finding parametric adjustments\n")
for (i in 1:n.batch){
temp <- it.sol(s.data[,batches[[i]]],gamma.hat[i,],delta.hat[i,],gamma.bar[i],t2[i],a.prior[i],b.prior[i])
gamma.star <- rbind(gamma.star,temp[1,])
delta.star <- rbind(delta.star,temp[2,])
}
}
### Normalize the Data ### -------------------------------------------------------
if (verbose) cat("[EZ.COMBAT] Adjusting the Data\n")
bayesdata <- s.data
j <- 1
for (i in batches){
if (use.eb){
bayesdata[,i] <- (bayesdata[,i]-t(batch.design[i,]%*%gamma.star))/(sqrt(delta.star[j,])%*%t(rep(1,n.batches[j])))
} else {
bayesdata[,i] <- (bayesdata[,i]-t(batch.design[i,]%*%gamma.hat))/(sqrt(delta.hat[j,])%*%t(rep(1,n.batches[j])))
}
j <- j+1
}
bayesdata <- (bayesdata*(sqrt(var.pooled)%*%t(rep(1,n.array))))+stand.mean
if (output[1] == "append") {
temp.df <- orig.f[ ,orig.dv.ls]
temp.df[ ,dv.ls] <- t(bayesdata)
new.df <- data.frame(orig.f, temp.df)
colnames(new.df) <- c(colnames(orig.f), paste0(orig.dv.ls, ".cb"))
} else if (output[1] == "overwrite") {
new.df <- orig.f
new.df[ ,dv.ls] <- t(bayesdata)
}
return(list(df=new.df,
gamma.hat=gamma.hat, delta.hat=delta.hat,
gamma.star=gamma.star, delta.star=delta.star,
gamma.bar=gamma.bar, t2=t2, a.prior=a.prior, b.prior=b.prior, batch=batch, mod=model,
stand.mean=stand.mean, stand.sd=sqrt(var.pooled)[,1])
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.