#' @include generics.R
#'
NULL
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Functions
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' Perform canonical correlation analysis (CCA) on two dimension
#'
#' Assuming there are two matrix X (M genes by K cells) and Y (N loci by L cells), we want to
#' find the cell correspondences and correlated gene/loci module between them.
#' Traditional CCA could not handle this case because it requires that there should be at least one
#' dimension shared by two datasets.
#' The DCCA function aims to finding one transition matrix Z (M genes by L cells) to bridge X and Y.
#' More details can be seen in xx
#'
#' @param X First matrix (M genes by K cells)
#' @param Y Second matrix (N loci by L cells)
#' @param Z0 Transition matrix (M genes by L cells)
#' @param num.D Number of canonical vectors to calculate for pair (X, Z) [default 10]
#' @param num.E Number of canonical vectors to calculate for pair (Z, Y) [default 10]
#' @param num.iteration Maximal number of iteration [default 100]
#' @param tolerance Relative change ratio for ||Z||F during iteration [default 0.01]
#' @param save Save the temporay files [default FALSE]
#'
#' @return Returns the object with list:
#'
#' * `Z` - contains the estimated transition matrix (M genes by L cells)
#' * `u` - contains the canonical correlation vectors for X (K cells by num.D factor)
#' * `r` - contains the canonical correlation vectors for Z (L cells by num.D factor)
#' * `s` - contains the canonical correlation vectors for Z (M genes by num.E factor)
#' * `v` - contains the canonical correlation vectors for Z (N loci by num.E factor)
#'
#' @importFrom methods slot
#' @importFrom stats ave aggregate
#'
#' @export
#'
#' @author Jinzhuang Dou, \email{Dou1@mdanderson.org}
#' @seealso \code{\link{xx}} \code{\link{xx}}
#'
#' @examples
#' runDCCA(X=X, Y=Y, Z0 = Z0, num.D = 6, num.E = 6)
#'
runDCCA <- function(X=NULL, Z0=NULL, Y=NULL, num.D = 10, output=NULL,
num.E = 10, num.iteration = 100, tolerance = 0.01, save = FALSE){
# Matrix dimension check
start_time <- Sys.time()
message(paste(start_time, " Started!"))
preCheck(X=X, Z0=Z0, Y=Y, num.D = num.D, num.E = num.E, num.iteration = num.iteration)
current_time <- Sys.time()
INF <- 10^(10)
M <- 200
X_Ncell <- dim(X)[2]
X_Ngene <- dim(X)[1]
Y_Ncell <- dim(Y)[2]
Y_Nloci <- dim(Y)[1]
Z_Ncell <- dim(Z0)[2]
Z_Ngene <- dim(Z0)[1]
message(paste(current_time, " Dimension Check: X[",X_Ngene,"x",X_Ncell,"]",
" Y[",Y_Nloci,"x",Y_Ncell,"]",
" Z0[",Z_Ngene,"x",Z_Ncell,"]", sep=""))
# Pre calculate generalized inverse
message(paste(current_time, " Centering and scaling data matrix"))
#X <- ScaleData(object = X, features = rownames(X), do.scale=TRUE, do.center=TRUE, verbose = FALSE)
#X <- colScale(X)
#Z0 <- ScaleData(object = as.matrix(Z0), do.scale=TRUE, do.center=TRUE, verbose = FALSE)
#Z0 <- colScale(Z0)
#Y <- ScaleData(object = t(Y), features = rownames(t(Y)), do.scale=TRUE, do.center=TRUE, verbose = FALSE)
#Y <- colScale(Y)
#Y <- t(Y)
XX_SVD <- irlba(crossprod(x = X, y = X), M, fastpath = FALSE)
YY_SVD <- irlba(crossprod(x = t(Y), y = t(Y)), M, fastpath = FALSE)
#XX_SVD$u <- L2Norm(XX_SVD$u)
#XX_SVD$v <- L2Norm(XX_SVD$v)
#YY_SVD$u <- L2Norm(YY_SVD$u)
#YY_SVD$v <- L2Norm(YY_SVD$v)
pre <- Z0
cnt <- 0
delta <- INF
rd_delta <- c()
pb <- progress_bar$new(
format = " Iterating [:bar] :percent eta: :eta",
total = num.iteration, clear = FALSE, width= 60)
for(cnt in seq(1,num.iteration,1)){
if (any(!is.finite(Z0))) stop("infinite or missing values in 'Z0'")
# rownames(Z0) <- rownames(X)
#Z0 <- ScaleData(object = as.matrix(Z0), do.scale=TRUE, do.center=TRUE, verbose = FALSE)
#Z0 <- colScale(Z0)
out1 <- estZ(A=X, B=Z0, AA_SVD = XX_SVD, k = num.D)
tp <- t(as.matrix(out1$Z))
# rownames(tp) <- colnames(Y)
#Z0 <- ScaleData(object = tp , do.scale=TRUE, do.center=TRUE, verbose = FALSE)
#Z0 <- colScale(Z0)
#Z0 <- t(Z0)
if (any(!is.finite(Z0))) stop("infinite or missing values in 'Z0'")
out2 <- estZ(A=t(Y), B=t(Z0), AA_SVD = YY_SVD, k = num.E)
Z0 <- t(out2$Z)
delta <- norm(Z0-pre, type="F")/norm(pre)
pre <- Z0
pb$tick()
rd_delta <- c(rd_delta, delta)
print(delta)
if(save==TRUE){
out<-list("cell_u" = out1$coembeding$u,
"cell_r" = out1$coembeding$v,
"feature_s" = out2$coembeding$u,
"feature_v" = out2$coembeding$v,
"Z" = Z0,
"delta" = rd_delta)
saveRDS(out, file=paste(output,"/iteration",cnt,".rds",sep=""))
}
if(delta < tolerance){break}
}
message("\n")
current_time <- Sys.time()
if(delta >= tolerance){
message(paste(current_time, " The decomposition is not converged. The output may not be optimal.
You can increase num.iteration again."))
}
else{
message(paste(current_time, " Done! The decomposition is converged."))
}
rownames(out1$coembeding$u) <- colnames(X)
rownames(out1$coembeding$v) <- colnames(Y)
rownames(out2$coembeding$u) <- rownames(X)
rownames(out2$coembeding$v) <- rownames(Y)
rownames(Z0) <- rownames(X)
colnames(Z0) <- colnames(Y)
out<-list("cell_u" = out1$coembeding$u,
"cell_r" = out1$coembeding$v,
"feature_s" = out2$coembeding$u,
"feature_v" = out2$coembeding$v,
"Z" = Z0,
"delta" = rd_delta)
return(out)
}
crossprod_e <- function(x= X, y=Y){
tp <- crossprod(x = x, y=y)
diag(tp) <- diag(tp) + 0
return(tp)
}
estZ <- function(A = NULL, B = NULL, AA_SVD = NULL, k = k){
INF <- 10^(10)
AB_SVD <- irlba(crossprod_e(x = A, y = B), k)
BB_SVD <- irlba(crossprod_e(x = B, y = B), k)
ss_SVD <- irlba(crossprod_e(x = t(AB_SVD$v), y = t(AB_SVD$v)), k)
#AB_SVD$u <- L2Norm(AB_SVD$u)
#AB_SVD$v <- L2Norm(AB_SVD$v)
#BB_SVD$u <- L2Norm(BB_SVD$u)
#ss_SVD$u <- L2Norm(ss_SVD$u)
#BB_SVD$v <- L2Norm(BB_SVD$v)
#ss_SVD$v <- L2Norm(ss_SVD$v)
lst<-list()
lst[[1]]<-A
lst[[2]]<-AA_SVD$u
tp <- AA_SVD$d
tp[tp<=0] <- INF
tp <- 1/sqrt(tp)
lst[[3]]<-diag(tp) # update
lst[[4]]<-t(AA_SVD$v)
lst[[5]]<-AB_SVD$u
lst[[6]]<-t(AB_SVD$v)
lst[[7]]<-ss_SVD$u
tp <- ss_SVD$d
tp[tp!=0] <- 1/tp[tp!=0]
lst[[8]]<-diag(tp) # update
lst[[9]]<-t(ss_SVD$u)
lst[[10]]<-BB_SVD$u
tp<-BB_SVD$d
tp[tp<=0] <- INF
tp <- sqrt(tp)
lst[[11]]<-diag(tp) # update
lst[[12]]<-t(BB_SVD$u)
Z0 <- mulitCrossprod(lst)
out <- list()
out$Z <- Z0
out$coembeding <- AB_SVD
return(out)
}
feature_match <- function(dt1, dt2, band.width=10){
out <- cdist(dt1, dt2)
out <- as.matrix(out)
for( i in seq(1, dim(out)[2],1)){
out[,i] <- exp(0-band.width*out[,i]/sd(out[,i]))
out[,i] <- out[,i]/sum(out[,i])
}
out[out<10^(-7)] <- 0
out <- Matrix(out, sparse = TRUE)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.