BatchQC Report
In BatchQC: Batch Effects Quality Control Software

shinyInput <- getShinyInput()
batch <- shinyInput$batch
condition <- shinyInput$condition
report_option_vector <- shinyInput$report_option_vector
eval_cell_1 = (report_option_vector[1]=="1")

r if (eval_cell_1) 'Summary' r if (eval_cell_1) '=======' r if (eval_cell_1) '## Confounding' r if (eval_cell_1) '### Number of samples in each Batch and Condition'

catbatch <- function(x) {
    paste("Batch", x)
}
catcondition <- function(x) {
    paste("Condition", x)
}
counts <- table(shinyInput$condition, shinyInput$batch)
countsmatrix <- as.matrix(counts)
colnames(countsmatrix) <- sapply(colnames(countsmatrix), catbatch, 
    simplify = TRUE)
rownames(countsmatrix) <- sapply(rownames(countsmatrix), catcondition, 
    simplify = TRUE)
panderOptions("table.split.table", 120)  ## table split at 100 (default 80) characters in a line
pander(countsmatrix)

r if (eval_cell_1) '### Measures of confounding between Batch and Condition'

counts <- table(shinyInput$condition, shinyInput$batch)
rowsums <- apply(counts, 1, sum)
colsums <- apply(counts, 2, sum)
tablesum <- sum(rowsums)
expected <- matrix(0, nrow(counts), ncol(counts))
for (i in 1:nrow(counts)) {
    for (j in 1:ncol(counts)) {
        expected[i, j] = rowsums[i] * colsums[j]/tablesum
    }
}
chi <- sum((counts - expected)^2/expected)
mmin <- min(nrow(counts), ncol(counts))
confound1 <- sqrt(chi * mmin/((chi + tablesum) * (mmin - 1)))  
## Standardized Pearson Correlation Coefficient
confound2 <- sqrt(chi/(tablesum * (mmin - 1)))  ## Cramer's V
confound <- matrix(c(confound1, confound2), nrow = 1)
colnames(confound) <- c("Standardized Pearson Correlation Coefficient", 
    "Cramer's V")
rownames(confound) <- c(
"Confounding Coefficients (0=no confounding, 1=complete confounding)")
panderOptions("table.split.table", 120)  ## table split at 100 (default 80) characters in a line
pander(confound)

r if (eval_cell_1) '## Variation Analysis' r if (eval_cell_1) '### Variation explained by Batch and Condition'

batchqc_ev <- tryCatch({
batchqc_explained_variation(shinyInput$lcounts, 
    shinyInput$condition, shinyInput$batch)
}, error = function(err) {
    warning("Error in BatchQC Explained Variation: ",err)
    return(NULL)
})
if (is.null(batchqc_ev))  {
    eval_cell_1 = FALSE
} else  {
    boxplot(batchqc_ev$explained_variation, ylab = 
        "Percent Explained Variation", main = 
        "Percent of Variation Explained by Source")
}

panderOptions("table.split.table", 120)  ## table split at 100 (default 80) characters in a line
pander(apply(batchqc_ev$explained_variation, 2, summary))

r if (eval_cell_1) '## P-value Analysis' r if (eval_cell_1) '### Distribution of Batch and Condition Effect p-values Across Genes'

cond_ps <- batchqc_ev$cond_test$p
batch_ps <- batchqc_ev$batch_test$p
pvalue_table <- rbind(
    `Batch P-values` = c(summary(batch_ps), 
    `Ps<0.05` = mean(batch_ps <= 0.05)),
    `Condition P-values` = c(summary(cond_ps), 
    `Ps<0.05` = mean(cond_ps <= 0.05)))
pander(pvalue_table)

cond_ps <- batchqc_ev$cond_test$p
batch_ps <- batchqc_ev$batch_test$p
nf <- graphics::layout(mat = matrix(c(1, 2), 2, 1, byrow = TRUE), 
    height = c(1, 3))
# par(mar=c(3.1, 3.1, 1.1, 2.1))
par(mar = c(3, 3, 1, 2))
boxplot(batch_ps, horizontal = TRUE, outline = TRUE, ylim = c(0, 1), 
    frame = F, col = "green1")
hist(batch_ps, xlim = c(0, 1), col = "pink", main = "")
title("Distribution of Batch Effect p-values Across Genes")

cond_ps <- batchqc_ev$cond_test$p
nf <- graphics::layout(mat = matrix(c(1, 2), 2, 1, byrow = TRUE), 
    height = c(1, 3))
# par(mar=c(3.1, 3.1, 1.1, 2.1))
par(mar = c(3, 3, 1, 2))
boxplot(cond_ps, horizontal = TRUE, outline = TRUE, ylim = c(0, 1), 
    frame = F, col = "green1")
hist(cond_ps, xlim = c(0, 1), col = "pink", main = "")
title("Distribution of Condition Effect p-values Across Genes")

eval_cell_2 = (report_option_vector[2]=="1")

r if (eval_cell_2) 'Differential Expression' r if (eval_cell_2) '=======================' r if (eval_cell_2) '## Expression Plot' r if (eval_cell_2) 'Boxplots for all values for each of the samples and are colored by batch membership.'

boxplot(shinyInput$lcounts,col=as.numeric(as.factor(batch)),main="Sample Boxplots (colored by batch)",xlab="Samples")

r if (eval_cell_2) '## LIMMA'

pdata <- data.frame(shinyInput$batch, shinyInput$condition)
ncond <- nlevels(as.factor(shinyInput$condition))
nbatch <- nlevels(as.factor(shinyInput$batch))
if (ncond > 1)  {
    if (nbatch <= 1)  {
        mod_full <- model.matrix(~as.factor(shinyInput$condition), data = pdata)
    } else  {
        mod_full <- model.matrix(~as.factor(shinyInput$condition) + 
            ~as.factor(shinyInput$batch), data = pdata)
    }
    tryCatch({
        fit <- lmFit(shinyInput$lcounts, mod_full)
        fit2 <- eBayes(fit)
        limmaTable <- topTable(fit2, coef = 2:ncond, number=10)
        for (j in 2:ncond)  {
            colnames(limmaTable)[j-1] <- paste("Condition: ", 
            levels(as.factor(shinyInput$condition))[j], " (logFC)", sep='')
        }
        panderOptions("table.split.table", 120)  
        ## table split at 100 (default 80) characters in a line
        pander(limmaTable)
    }, error = function(err) {
        warning("Error in BatchQC Limma Table: ",err)
    })

}

eval_cell_3 = (report_option_vector[3]=="1")

r if (eval_cell_3) 'Median Correlations' r if (eval_cell_3) '===================' r if (eval_cell_3) 'This plot helps identify outlying samples.'

batchqc_corscatter(shinyInput$lcounts, batch=batch, mod=mod)

eval_cell_4 = (report_option_vector[4]=="1")

r if (eval_cell_4) 'Heatmaps' r if (eval_cell_4) '========' r if (eval_cell_4) '## Heatmap' r if (eval_cell_4) 'This is a heatmap of the given data matrix showing the batch effects and variations with different conditions.'

batchqc_heatmap(shinyInput$lcounts, batch=batch, mod=mod)

r if (eval_cell_4) '## Sample Correlations' r if (eval_cell_4) 'This is a heatmap of the correlation between samples.'

batchqc_correlation(shinyInput$lcounts, batch=batch, mod=mod)

eval_cell_5 = (report_option_vector[5]=="1")

r if (eval_cell_5) 'Circular Dendrogram' r if (eval_cell_5) '===================' r if (eval_cell_5) 'This is a Circular Dendrogram of the given data matrix colored by batch to show the batch effects.'

batchqc_circosplot(shinyInput$lcounts, shinyInput$batch, "complete")

eval_cell_6 = (report_option_vector[6]=="1")

r if (eval_cell_6) 'PCA: Principal Component Analysis' r if (eval_cell_6) '=================================' r if (eval_cell_6) '## PCA' r if (eval_cell_6) 'This is a plot of the top two principal components colored by batch to show the batch effects.'

pca <- tryCatch({
    batchqc_pca(shinyInput$lcounts, batch=batch, mod=mod)
}, error = function(err) {
    warning("Error in BatchQC PCA: ",err)
    return(NULL)
})
if (is.null(pca))  {
    eval_cell_6 = FALSE
} else  {
    shinyInput <- getShinyInput()
    shinyInput <- c(shinyInput, list("pc"=data.frame(pca$x), "vars"=pca$sdev^2))
    setShinyInput(shinyInput)
}

r if (eval_cell_6) '## Explained Variation'

pc <- shinyInput$pc
pcs <- t(pc)
explained_variation <- tryCatch({
    explained_variation <- batchqc_pc_explained_variation(pcs, 
        shinyInput$vars, condition, batch)
}, error = function(err) {
    warning("Error in BatchQC PC Explained Variation: ",err)
    return(NULL)
})
if (!is.null(explained_variation))  {
    panderOptions("table.split.table", 240)  
    ## table split at 240 (default 80) characters in a line
    pander(explained_variation)
}

eval_cell_7 = (report_option_vector[7]=="1")

r if (eval_cell_7) 'Shape' r if (eval_cell_7) '=====' r if (eval_cell_7) 'This is a heatmap plot showing the variation of gene expression mean, variance, skewness and kurtosis between samples grouped by batch to see the batch effects variation'

lcounts_adj <- tryCatch({
    batchQC_condition_adjusted(shinyInput$lcounts, batch, condition)
}, error = function(err) {
    warning("Error in BatchQC Condition adjusted data: ",err)
    return(shinyInput$lcounts)
})
bf <- as.factor(shinyInput$batch)
pval <- batchQC_shapeVariation(lcounts_adj, batch, plot = TRUE, groupCol = 
    rainbow(nlevels(bf))[bf])
cat(paste("Note: Sample-wise p-value is calculated for the",
    "variation across samples on the measure",
    "across genes. Gene-wise p-value is calculated for the",
    "variation of each gene between batches",
    "on the measure across each batch. If the data is", 
    "quantum normalized, then the Sample-wise measure across",
    "genes is same for all samples and Gene-wise p-value",
    "is a good measure.", 
    sep=" "))

eval_cell_8 = (report_option_vector[8]=="1")

r if (eval_cell_8) 'Combat Plots' r if (eval_cell_8) '============' r if (eval_cell_8) 'This is a plot showing whether parametric or non-parameteric prior is appropriate for this data. It also shows the Kolmogorov-Smirnov test comparing the parametric and non-parameteric prior distribution.'

kstest <- tryCatch({
    combatPlot(shinyInput$lcounts, batch=shinyInput$batch, mod=mod)
}, error = function(err) {
    warning("Error in BatchQC ComBat Plot: ",err)
    return(NULL)
})
if (!is.null(kstest))  {
    shinyInput <- getShinyInput()
    delta.hat <- shinyInput$delta.hat
    gamma.hat <- shinyInput$gamma.hat
    gamma.bar <- shinyInput$gamma.bar
    a.prior <- shinyInput$a.prior
    b.prior <- shinyInput$b.prior
    ksout <- ks.test(gamma.hat[1, ], "pnorm", 
        gamma.bar[1], sqrt(shinyInput$t2[1]))  
        # two-sided, exact
    summarytext <- 
    "Batch mean distribution across genes: Normal vs Empirical distribution"
    summarytext <- paste(summarytext, "Two-sided Kolmogorov-Smirnov test", 
        sep = "\n")
    summarytext <- paste(summarytext, "Selected Batch: ", sep = "\n")
    summarytext <- paste(summarytext, 1, sep = "")
    summarytext <- paste(summarytext, "Statistic D = ", sep = "\n")
    summarytext <- paste(summarytext, signif(ksout$statistic, 4), sep = "")
    summarytext <- paste(summarytext, "p-value = ", sep = "\n")
    summarytext <- paste(summarytext, signif(ksout$p.value, 4), sep = "")

    invgam <- 1/rgamma(ncol(delta.hat), a.prior[1], 
        b.prior[1])
    ksvarout <- ks.test(delta.hat[1, ], invgam)  
        # two-sided, exact
    summarytext <- paste(summarytext, 
        "\n\n\nBatch Variance distribution across genes: ",
        "Inverse Gamma vs Empirical distribution", 
        sep = "")
    summarytext <- paste(summarytext, "Two-sided Kolmogorov-Smirnov test", 
        sep = "\n")
    summarytext <- paste(summarytext, "Selected Batch: ", sep = "\n")
    summarytext <- paste(summarytext, 1, sep = "")
    summarytext <- paste(summarytext, "Statistic D = ", sep = "\n")
    summarytext <- paste(summarytext, signif(ksvarout$statistic, 4), 
        sep = "")
    summarytext <- paste(summarytext, "p-value = ", sep = "\n")
    summarytext <- paste(summarytext, signif(ksvarout$p.value, 4), sep = "")

    cat(summarytext)
    cat(paste("Note: The non-parametric version of ComBat",
        "takes much longer time to run and we recommend it",
        "only when the shape of the non-parametric curve",
        "widely differs such as a bimodal or highly skewed",
        "distribution. Otherwise, the difference in batch", 
        "adjustment is very negligible and parametric version",
        "is recommended even if p-value of KS test above is",
        "significant.", sep=" "))
}

eval_cell_9 = (report_option_vector[9]=="1")

r if (eval_cell_9) 'SVA' r if (eval_cell_9) '===' r if (eval_cell_6) '## Summary'

nsample <- dim(shinyInput$lcounts)[2]
sample <- 1:nsample
pdata <- data.frame(sample, condition)
ncond <- nlevels(as.factor(condition))
if (ncond <= 1)  {
    modmatrix = matrix(rep(1, ncol(shinyInput$lcounts)), ncol = 1)
} else  {
    modmatrix = model.matrix(~as.factor(condition), data = pdata)
}
n.sv <- tryCatch({
    batchQC_num.sv(shinyInput$lcounts, modmatrix)
}, error = function(err) {
    warning("Error in BatchQC num.sv surrogate variables compution: ",err)
    return(NULL)
})
if (!is.null(n.sv))  {
cat(paste("Number of Surrogate Variables found in the given data:", n.sv))
}