Nothing
#' Adjust the distance matrix by quantile normalization for data with batch effect
#'
#'
#' @description This function applies quantile normalization on the distance matrix (dissimilarity matrix) and return the corrected distance matrix.
#' @param dat The original p*n batch effect data with n subjects and p RNA-seq measurements or the n by n distance matrix.
#' @param batch The vector of length n indicating which batch the subjects belong to.
#' @param method Method for the quantile normalization. There are two options: "row/column" and "vectorize".
#' @param cor_method Method to calculate the correlation matrix, can be 'spearman'(default), 'pearson' or 'kendall'.
#' @param tol The tolerance for the iterative method "row/column", which is the Euclidean distance of the vectorized two dissimilarity matrices before and after each iteration.
#' @param max Maximum number of the iteration if the tolerance is not reached.
#' @param logdat Whether conducting log transformation to data or not.
#' @param standardize Whether conducting standardization [(dat - mean)/sqrt(var)] to data or not.
#' @return Returns the corrected 1-correlation matrix between subjects.
#' @author Teng Fei. Email: tfei@emory.edu
#' @references Fei et al (2018), Mitigating the adverse impact of batch effects in sample pattern detection, Bioinformatics, <https://doi.org/10.1093/bioinformatics/bty117>.
#' @export
#' @examples
#'
#' library(pheatmap) #drawing heatmap
#'
#' data("ENCODE") #load the ENCODE data
#'
#' #Before correction, the subjects are clustered by species
#' pheatmap(cor(ENCODE))
#'
#' #Assigning the batches based on species
#' batches <- c(rep(1,13),rep(2,13))
#'
#' #QuantNorm correction
#' corrected.distance.matrix <- QuantNorm(ENCODE,batches,method='row/column', cor_method='pearson',
#' logdat=FALSE, standardize = TRUE, tol=1e-4)
#' pheatmap(1-corrected.distance.matrix)
QuantNorm <- function (dat, batch, method = "row/column", cor_method = 'spearman', tol = 1e-2, max = 50, logdat = TRUE,
standardize = FALSE)
{
dist = 10
iter = 0
if (ncol(dat) != nrow(dat)){
cat(paste('Input data is a',nrow(dat),'by',ncol(dat),'count matrix.\n',sep=' '))
if (standardize == TRUE) {
ccc <- standardization(dat, batch, method=cor_method)
}
else if (logdat == FALSE) {
ccc <- 1 - stats::cor(dat, method = cor_method)
}
else {
ccc <- 1 - stats::cor(log(dat + 1), method = cor_method)
}
}else if (ncol(dat) == nrow(dat)){
cat(paste('Input data is a',nrow(dat),'by',ncol(dat),'distance matrix.\n',sep=' '))
ccc <- dat
}
if (method == "vectorize") {
ccc <- qnorm2(ccc, batch)
}
else if (method == "row/column") {
while (dist > tol && iter < max){
ccc.0 <- ccc
ccc <- qnorm1plus(ccc, batch)
dist = sqrt(sum((as.vector(ccc)-as.vector(ccc.0))^2))
iter = iter+1
}
if (dist <= tol){
cat(paste("Algorithm converged after", iter, "iterations. \n"))
}else{
cat(paste("The algorithm did not converge after", max,"iteration. \n The difference between the last two iteration is", dist,".\n"))
}
}else{
cat("method must be 'row/column' or 'vectorize'. \n")
}
return(ccc)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.