###################################################################################
##Note: this script contains function for correlation calcuation with snowfall
## package in R for parallell computing.
##Author: Chuang Ma
##Date: 2012-02-16
##################################################################################
#if( !require(biwt)) install.packages("biwt")
#require(biwt)
#if( !require(parmigene)) install.package("parmigene")
#library(parmigene)
#########################################################################
##compute Gini correlation by using the rank informaiton of x and the
## value informaiton of y
########################################################################
#' @export
onegcc <- function(x, y) {
#rank function, get y[m] and y(m) with the rank information of x (first line)
getrank <- function(datamatrix) {
if( dim(datamatrix)[1] != 2 ) {
stop("Error: the row num of datamatrix must be 2")
}
#use the order information of X
OrderIndex <- order( datamatrix[1,], decreasing = FALSE )
SortGenePair <- datamatrix[, OrderIndex]
Sort2By1 <- SortGenePair[2,]
return( list(Sort2By1 = Sort2By1,
Sort2By2 = sort(datamatrix[2,], decreasing = FALSE )
) )
}#end getrank
##compute gcc with weight vector and y[m] and y(m)
gcc <- function( weightvec, vectsort, vectselfsort) {
Sum1 <- sum(weightvec*vectsort)
Sum2 <- sum(weightvec*vectselfsort)
if( Sum2 == 0 ) {
cat("\n", x, "\n", y, "\n")
cat("Warning: the Denominator is ZRRO, the value of one variable is consistent.")
gcccor <- 0
}else {
gcccor <- Sum1/Sum2
}
return(gcccor)
}
######################################################
##for gini correlation
##generate weight vector
Length <- length(x)
Wt <- t(2*seq(1, Length, by = 1) - Length - 1)
##for gcc.rankx
GenePairXY <- t(matrix( c(x,y), ncol = 2))
SortYlist <- getrank(GenePairXY)
gcc.rankx <- gcc(Wt, SortYlist$Sort2By1, SortYlist$Sort2By2)
return(gcc.rankx)
}
##########################################################################
##get final correlation and p-value for GCC
##input gcccor is the output of allcor function for GCC
##########################################################################
#' @export
gcc.corfinal <- function( gcccor ) {
##if gcccor has p-value informaiton, select correlation with p-value
if( is.numeric(gcccor$gcc.rankx.pvalue) & is.numeric(gcccor$gcc.ranky.pvalue) ) {
fpvalue <- gcccor$gcc.rankx.pvalue
fgcc <- gcccor$gcc.rankx
if( gcccor$gcc.ranky.pvalue < fpvalue ) {
fpvalue <- gcccor$gcc.ranky.pvalue
fgcc <- gcccor$gcc.ranky
}
}else { ##no p-value, we selected the gcc with absolute max vlaue
fpvalue <- NA
x <- c(gcccor$gcc.rankx, gcccor$gcc.ranky)
fgcc <- x[ which( abs(x) == max(abs(x)) ) ]
if( length(fgcc) > 1 ) { #check length
# cat("Warnning: the correlation coefficients generated by GCC method are the same after abs() operation.",
# gcccor$gcc.rankx, gcccor$gcc.ranky,
# "In current version of Rgcc, only the first one is selected...")
fgcc <- fgcc[1]
}
}
return( list(gcc.fcor = fgcc, gcc.fpvalue = fpvalue))
}
#' @export
cor.pair <- function( idxvec,
GEMatrix,
rowORcol = c("row", "col"),
cormethod = c("GCC", "PCC", "SCC", "KCC", "BiWt"),
pernum = 0,
sigmethod = c("two.sided", "one.sided") )
{
#########################################################################
##compute Gini correlation by using the rank informaiton of x and the
## value informaiton of y
########################################################################
#' @export
onegcc <- function(x, y) {
#rank function, get y[m] and y(m) with the rank information of x (first line)
getrank <- function(datamatrix) {
if( dim(datamatrix)[1] != 2 ) {
stop("Error: the row num of datamatrix must be 2")
}
#use the order information of X
OrderIndex <- order( datamatrix[1,], decreasing = FALSE )
SortGenePair <- datamatrix[, OrderIndex]
Sort2By1 <- SortGenePair[2,]
return( list(Sort2By1 = Sort2By1,
Sort2By2 = sort(datamatrix[2,], decreasing = FALSE )
) )
}#end getrank
##compute gcc with weight vector and y[m] and y(m)
#' @export
gcc <- function( weightvec, vectsort, vectselfsort) {
Sum1 <- sum(weightvec*vectsort)
Sum2 <- sum(weightvec*vectselfsort)
if( Sum2 == 0 ) {
cat("\n", x, "\n", y, "\n")
cat("Warning: the Denominator is ZRRO, the value of one variable is consistent.")
gcccor <- 0
}else {
gcccor <- Sum1/Sum2
}
return(gcccor)
}
######################################################
##for gini correlation
##generate weight vector
Length <- length(x)
Wt <- t(2*seq(1, Length, by = 1) - Length - 1)
##for gcc.rankx
GenePairXY <- t(matrix( c(x,y), ncol = 2))
SortYlist <- getrank(GenePairXY)
gcc.rankx <- gcc(Wt, SortYlist$Sort2By1, SortYlist$Sort2By2)
return(gcc.rankx)
}
if(!is.vector(idxvec) | length(idxvec) != 2) {
stop("Error: idxvec must be a vector with two elements indicating the indexs(rows) in GEMatrix")
}
# if( class(GEMatrix) != "matrix" ) {
# stop("Error: GEMatrix should be a numeric data matrix")
# }
if( rowORcol == "row" ) {
x1 <- GEMatrix[idxvec[1],]
y1 <- GEMatrix[idxvec[2],]
}else if(rowORcol == "col") {
x1 <- GEMatrix[,idxvec[1]]
y1 <- GEMatrix[,idxvec[2]]
}else {
stop("Error: rowORcol must be \"row\" or \"col\"")
}
##function for all considered correlation methods
getcor <- function(g1, g2, cormethod ) {
if( cormethod == "PCC") { return(cor.test(g1, g2, method="pearson")$estimate) }
else if( cormethod == "SCC" ) { return(cor.test(g1, g2, method="spearman")$estimate) }
else if( cormethod == "KCC" ) { return(cor.test(g1, g2, method="kendall")$estimate) }
else if( cormethod == "BiWt" ){ return(biwt.cor(t(matrix(c(g1,g2), ncol=2)), output="vector")[1]) }
else if( cormethod == "GCC" ){
return(list( gcc.rankx = onegcc(g1,g2), gcc.ranky = onegcc(g2,g1) ) )
}
}#end getcor
##get pvalue for permutation test
getpvalue <- function( percorvec, pernum, realcor, sigmethod ) {
pvalue <- length(which(percorvec >= realcor))/pernum
if( pvalue == 0 ) pvalue <- 1.0/pernum
if( pvalue > 0.5 ) pvalue <- 1.0 - pvalue
if( sigmethod == "two.sided" ) {
pvalue <- 2*pvalue
}
return(pvalue)
}#end getpvalue
#check cormethod
if( length(cormethod) > 1 ) {
stop("Error: length of cormethod must be of length 1")
}
#check vector
if( !is.vector(x1) || !is.vector(y1) ) {
stop("Error: input two vectors for gcc.corpair")
}
#check numeric
if( !is.numeric(x1) || !is.numeric(y1) ) {
stop("Error: x should be numeric")
}
#check NA
if( length(which(is.na(x1) == TRUE)) > 0 || length(which(is.na(y1) == TRUE)) > 0 ){
stop("Error: There are Na(s) in x")
}
#check length
if( length(x1) != length(y1) ) {
stop("Error: the lengths of each row in x are different.\n")
}
realcor <- getcor(x1, y1, cormethod)
##check pernum for permutation or output
if( pernum <= 0 ) {
if( cormethod == "GCC") {
return( list(gcc.rankx=realcor$gcc.rankx, gcc.ranky=realcor$gcc.ranky, gcc.rankx.pvalue = NA, gcc.ranky.pvalue = NA) )
}else {
return( list(cor = realcor, pvalue = NA))
}
}else {
##generate matrix for permuted correlation
pGCCMatrix <- matrix(0, nrow = pernum, ncol = 2)
colnames(pGCCMatrix) <- c("gcc.rankx", "gcc.ranky")
rownames(pGCCMatrix) <- paste("permut", seq(1,pernum, by=1), sep="")
GenePairXY <- t(matrix( c(x1, y1), ncol = 2))
pGenePairXY <- GenePairXY
Length <- length(x1)
for( i in 1:pernum ) {
##get system time for seed and then generate random index
curtime <- format(Sys.time(), "%H:%M:%OS4")
XXX <- unlist(strsplit(curtime, ":"))
curtimeidx <- (as.numeric(XXX[1])*3600 + as.numeric(XXX[2])*60 + as.numeric(XXX[3]))*10000
set.seed( curtimeidx )
TT = sort(runif(Length),index.return=TRUE)$ix
pGenePairXY[1,] <- GenePairXY[1,TT]
if( cormethod == "GCC" ) {
cortmp <- getcor( pGenePairXY[1,], pGenePairXY[2,], cormethod )
pGCCMatrix[i,1] <- cortmp$gcc.rankx
pGCCMatrix[i,2] <- cortmp$gcc.ranky
}else {
pGCCMatrix[i,1] <- getcor( pGenePairXY[1,], pGenePairXY[2,], cormethod )
}
}#end for i
#compute p-value
if( cormethod == "GCC" ) {
return( list( gcc.rankx = realcor$gcc.rankx,
gcc.ranky = realcor$gcc.ranky,
gcc.rankx.pvalue = getpvalue(pGCCMatrix[,1], pernum, realcor$gcc.rankx, sigmethod),
gcc.ranky.pvalue = getpvalue(pGCCMatrix[,2], pernum, realcor$gcc.ranky, sigmethod)) )
}else {
return( list (cor = realcor,
pvalue = getpvalue(pGCCMatrix[,1], pernum, realcor, sigmethod)) )
}
}
}#end allcor
##############################################################################
##Calcluate correlations for variables in a matrix with parallel computing
##x: numeric matrix
##asym: If ture, output gccx and gccy; or else, output max gcc(or gcc with low p value)
##style: "all.pairs", "pairs.between", "adjacent.pairs", "one.pair"
##var1.id
##var2.id
##pernum
##sigmethod
##Date: 2012-02-16
##############################################################################
#' @export
cor.matrix <- function( GEMatrix,
cpus = 1,
cormethod = c("GCC", "PCC", "SCC", "KCC", "BiWt"),
style = c("all.pairs", "pairs.between", "adjacent.pairs", "one.pair"),
var1.id = NA,
var2.id = NA,
pernum = 0,
sigmethod = c("two.sided", "one.sided"),
output = c("matrix", "paired")
) {
if( cpus > 1 ) {
require(snowfall)
}
#check cormethod
if( length( cormethod ) > 1 ) {
cat("Warning: one correlation method should be specified. Default:GCC")
}
#check style
if( length(style) > 1 ) {
cat( "Warning: one style should be specified. Default: all.pairs")
}
#check sigmethod
if( pernum > 0 & length( sigmethod ) > 1 ) {
sigmethod = "two.sided"
}
if( pernum == 0 ) {
sigmethod <- "two.sided"
}
#check whether matrix
if( !is.matrix( GEMatrix) ) {
stop("Error: GEMatrix in cor.matrix function is not matrix")
}
if( !is.numeric( GEMatrix) ){
stop("Error: GEMatrix is not numeric")
}
if( length(rownames(GEMatrix)) == 0 ) { #no rownames
rownames(GEMatrix) <- seq(1,dim(GEMatrix)[1], by=1)
}
VariableNum <- nrow(GEMatrix)
SampleSize <- ncol(GEMatrix)
if( VariableNum <= 1 || SampleSize <= 1 ){
stop("Error:the number of variable is less than 2, or the number of observation is less than 2 ")
}
if( style == "one.pair" ) {
if( length(var1.id) != 1 || length(var2.id) != 1 || is.na(var1.id) == TRUE || is.na(var2.id) == TRUE) {
stop("Error: Not define the var1.id or var1.id")
}
}
if( style == "adjacent.pairs") {
var1.id <- seq(1, (VariableNum-1), by=1)
var2.id <- var1.id + 1
}
#for task matrix
if( style == "one.pair" || style == "adjacent.pairs") {
if( length(var1.id) != length(var2.id) ) {
stop("Error: var1.id and var2.id should be vectors with the same length")
}
taskmatrix <- matrix(c(var1.id, var2.id), ncol = 2)
}#end if style
if( style == "pairs.between" ) {
if( length( which(is.na(var1.id) == TRUE) ) > 0 | length( which(is.na(var1.id) == TRUE) ) > 0 ){
stop("Error: no variable IDs are given")
}
if( length( which((var1.id != var2.id) == TRUE ) ) > 0 ) {
stop("Error: var1.id should be the same with var2.id for the pairs.between style")
}
if( length(which(is.numeric(var1.id) == FALSE)) > 0 | length(which(is.numeric(var2.id) == FALSE)) > 0 ){
stop("Error:var1.id and var2.id should be numeric vector")
}
}
if( style == "all.pairs" ) {
var1.id <- seq(1, dim(GEMatrix)[1], by=1)
var2.id <- var1.id
}
if( style == "pairs.between" || style == "all.pairs") {
CurLen <- length(var1.id)
taskmatrix <- matrix(0, nrow = length(var1.id)*(length(var1.id)+1)/2, ncol = 2)
kk = 0
for( i in 1:length(var1.id)) {
j <- 1
while( j <= i ) {
kk <- kk + 1
taskmatrix[kk,] <- c(i,j)
j <- j + 1
}#end while j
}#end for i
}#end style
##implement apply
if( cpus == 1 | cormethod == "BiWt") {
results <- apply(taskmatrix, 1, cor.pair, GEMatrix = GEMatrix, rowORcol = "row", cormethod = cormethod, pernum = pernum, sigmethod = sigmethod)
}else {
sfInit(parallel=TRUE, cpus=cpus)
print(sprintf('%s cpus to be used', sfCpus()))
# if( cormethod == "GCC") sfExport('onegcc')
# if( cormethod == "BiWt") sfExport('biwt.cor')
results <- sfApply(taskmatrix, 1, cor.pair, GEMatrix = GEMatrix, rowORcol = "row", cormethod = cormethod, pernum = pernum, sigmethod = sigmethod)
sfStop()
}
##get final results
if( output == "paired") {
kk <- 0
corpvalueMatrix <- matrix(NA, nrow = dim(taskmatrix)[1], ncol = 4)
for( i in 1:dim(taskmatrix)[1] ) {
if( taskmatrix[i,1] == taskmatrix[i,2] ) next
kk <- kk + 1
corpvalueMatrix[kk,1:2] <- rownames(GEMatrix)[taskmatrix[i,]]
if( cormethod == "GCC" ) {
fGCC <- gcc.corfinal(results[i][[1]])
corpvalueMatrix[kk,3] <- fGCC$gcc.fcor
corpvalueMatrix[kk,4] <- fGCC$gcc.fpvalue
}else {
corpvalueMatrix[kk,3] <- results[i][[1]]$cor
corpvalueMatrix[kk,4] <- results[i][[1]]$pvalue
}
}#end for i
return( corpvalueMatrix[1:kk,] )
}else {
##get final results
UniqueRow <- sort(unique(taskmatrix[,1]))
UniqueCol <- sort(unique(taskmatrix[,2]))
corMatrix <- matrix(0, nrow = length(UniqueRow), ncol = length(UniqueCol))
rownames(corMatrix) <- rownames(GEMatrix)[UniqueRow]
colnames(corMatrix) <- rownames(GEMatrix)[UniqueCol]
pvalueMatrix <- corMatrix
pvalueMatrix[] <- NA
for( i in 1:dim(taskmatrix)[1] ) {
rowidx <- which(UniqueRow == taskmatrix[i,1])
colidx <- which(UniqueCol == taskmatrix[i,2])
if( cormethod == "GCC" ) {
fGCC <- gcc.corfinal(results[i][[1]])
corMatrix[rowidx,colidx] <- fGCC$gcc.fcor
pvalueMatrix[rowidx,colidx] <- fGCC$gcc.fpvalue
}else {
corMatrix[rowidx,colidx] <- results[i][[1]]$cor
pvalueMatrix[rowidx,colidx] <- results[i][[1]]$pvalue
}
if( style == "pairs.between" | style == "all.pairs" ) {
corMatrix[colidx, rowidx] <- corMatrix[rowidx,colidx]
pvalueMatrix[colidx,rowidx] <- pvalueMatrix[rowidx,colidx]
}
}#end for i
return( list(corMatrix = corMatrix, pvalueMatrix = pvalueMatrix) )
}#end else
}#end function
#############################################################################
##This function plots HeatMap with different similarity measures (1-CorCoef)
##Here CorCoef could be GCC (Gini correlation coefficient),
##PCC (Pearson product-moment correlation coefficient),
##SCC (Spearman's rank correlation coefficient),
##KCC (Kendall tau correlation coefficient)
##and BiWt(correlation estimates based on Tukey's biweight M-estimator)
##To set other parameters, please ref HeatMap.2 function in gplots package.
#############################################################################
#' @export
gcc.heatmap <- function(x,
cpus = 1,
## correlation method
method = c("GCC", "PCC", "SCC", "KCC", "BiWt", "MI", "MINE", "ED"),
## similarity method
distancemethod = c("Raw", "Abs", "Sqr"),
#hclustfun = hclust,
clustermethod = c("complete", "average", "median", "centroid", "mcquitty", "single", "ward"),
rowhcdata = NULL,
colhcdata = NULL,
keynote = "FPKM",
## dendrogram control
symm = FALSE,
Rowv = TRUE,
Colv= if(symm)"Rowv" else TRUE,
dendrogram = c("both","row","column","none"),
## data scaling
scale = c("none","row", "column"),
na.rm=TRUE,
## image plot
revC = identical(Colv, "Rowv"),
add.expr,
## mapping data to colors
breaks = 16,
quanbreaks = TRUE,
symbreaks=min(x < 0, na.rm=TRUE) || scale!="none",
## colors
colrange= c("green", "black", "red"),
## block sepration
colsep,
rowsep,
sepcolor="white",
sepwidth=c(0.05,0.05),
## cell labeling
cellnote,
notecex=1.0,
notecol="cyan",
na.color=par("bg"),
## level trace
trace=c("none", "column","row","both"),
tracecol="cyan",
hline=median(breaks),
vline=median(breaks),
linecol=tracecol,
## Row/Column Labeling
margins = c(5, 5),
ColSideColors,
RowSideColors,
cexRow = 0.2 + 1/log10(dim(x)[1]),
cexCol = 0.2 + 1/log10(dim(x)[2]),
labRow = NULL,
labCol = NULL,
## color key + density info
key = TRUE,
keysize = 0.65,
density.info=c("none","histogram","density"),
denscol=tracecol,
symkey = min(x < 0, na.rm=TRUE) || symbreaks,
densadj = 0.25,
## plot labels
main = NULL,
xlab = NULL,
ylab = NULL,
## plot layout
lmat = NULL,
lhei = NULL,
lwid = NULL,
## extras
...
)
{
cat("GE matrix start to be clustered:", dim(x), "\n")
scale01 <- function(x, low = min(x), high = max(x)) { x <- (x - low)/(high - low); x }
retval <- list()
scale <- if(symm & missing(scale)) { "none" }
else { match.arg(scale) }
dendrogram <- match.arg(dendrogram)
trace <- match.arg(trace)
density.info <- match.arg(density.info)
if (length(colrange) == 1 & is.character(colrange)) {
colrange <- get(colrange, mode = "function")
col <- colrange
}
if (!missing(breaks) & (scale != "none"))
warning("Using scale=\"row\" or scale=\"column\" when breaks are",
"specified can produce unpredictable results.", "Please consider using only one or the other.")
if (is.null(Rowv) | is.na(Rowv)) { Rowv <- FALSE }
if (is.null(Colv) | is.na(Colv)) { Colv <- FALSE
}else if (Colv == "Rowv" & !isTRUE(Rowv)) { Colv <- FALSE }
if (length(di <- dim(x)) != 2 || !is.numeric(x)) {stop("`x' must be a numeric matrix") }
nr <- di[1]
nc <- di[2]
if(nr <= 1 | nc <= 1) {stop("`x' must have at least 2 rows and 2 columns")}
cat("nr = ", nr, "\n")
cat("nc = ", nc, "\n")
if(!is.numeric(margins) | length(margins) != 2) {stop("`margins' must be a numeric vector of length 2")}
if(missing(cellnote)) { cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x)) }
if(!inherits(Rowv, "dendrogram")) {
if(((!isTRUE(Rowv)) | (is.null(Rowv))) & (dendrogram %in% c("both", "row"))) {
if (is.logical(Colv) & (Colv)) { dendrogram <- "column" }
else { dedrogram <- "none" }
warning("Discrepancy: Rowv is FALSE, while dendrogram is `", dendrogram, "'. Omitting row dendogram.")
}
}
if(!inherits(Colv, "dendrogram")) {
if(((!isTRUE(Colv)) | (is.null(Colv))) & (dendrogram %in% c("both", "column"))) {
if (is.logical(Rowv) & (Rowv)) {dendrogram <- "row" }
else { dendrogram <- "none" }
warning("Discrepancy: Colv is FALSE, while dendrogram is `", dendrogram, "'. Omitting column dendogram.")
}
}
if (inherits(Rowv, "dendrogram")) {
ddr <- Rowv
rowInd <- order.dendrogram(ddr)
}
else if (is.integer(Rowv)) {
if( missing(rowhcdata) | is.null(rowhcdata) ) {
hcr <- gcc.hclust( x, cpus = cpus, method = method, distancemethod = distancemethod, clustermethod = clustermethod)
}else {
hcr <- rowhcdata
}
ddr <- as.dendrogram(hcr$hc)
ddr <- reorder(ddr, Rowv)
rowInd <- order.dendrogram(ddr)
if (nr != length(rowInd)) {stop("row dendrogram ordering gave index of wrong length") }
}
else if (isTRUE(Rowv)) {
Rowv <- rowMeans(x, na.rm = na.rm)
if( missing(rowhcdata) | is.null(rowhcdata) ) {
hcr <- gcc.hclust( x, cpus = cpus, method = method, distancemethod = distancemethod, clustermethod = clustermethod)
}else {
hcr <- rowhcdata
}
ddr <- as.dendrogram(hcr$hc)
ddr <- reorder(ddr, Rowv)
rowInd <- order.dendrogram(ddr)
if (nr != length(rowInd)) {stop("row dendrogram ordering gave index of wrong length") }
}
else {
rowInd <- nr:1
}
if (inherits(Colv, "dendrogram")) {
ddc <- Colv
colInd <- order.dendrogram(ddc)
}else if (identical(Colv, "Rowv")) {
if (nr != nc) { stop("Colv = \"Rowv\" but nrow(x) != ncol(x)") }
if (exists("ddr")) {
ddc <- ddr
colInd <- order.dendrogram(ddc)
}
else colInd <- rowInd
}
else if (is.integer(Colv)) {
aa <- x
if( !symm ) aa <- t(x)
if( missing(colhcdata) | is.null(colhcdata) ) {
hcc <- gcc.hclust( aa, cpus = cpus, method = method, distancemethod = distancemethod, clustermethod = clustermethod)
}else {
hcc <- colhcdata
}
ddc <- as.dendrogram(hcc$hc)
ddc <- reorder(ddc, Colv)
colInd <- order.dendrogram(ddc)
if (nc != length(colInd)) { stop("column dendrogram ordering gave index of wrong length") }
}
else if (isTRUE(Colv)) {
Colv <- colMeans(x, na.rm = na.rm)
aa <- x
if( !symm ) aa <- t(x)
if( missing(colhcdata) | is.null(colhcdata) ) {
hcc <- gcc.hclust( aa, cpus = cpus, method = method, distancemethod = distancemethod, clustermethod = clustermethod)
}else {
hcc <- colhcdata
}
ddc <- as.dendrogram(hcc$hc)
ddc <- reorder(ddc, Colv)
colInd <- order.dendrogram(ddc)
if (nc != length(colInd)) {stop("column dendrogram ordering gave index of wrong length") }
}
else {
colInd <- 1:nc
}
retval$rowInd <- rowInd
retval$colInd <- colInd
retval$call <- match.call()
x <- x[rowInd, colInd]
x.unscaled <- x
cellnote <- cellnote[rowInd, colInd]
if (is.null(labRow)) {
labRow <- if (is.null(rownames(x)))
(1:nr)[rowInd]
else rownames(x)
}else { labRow <- labRow[rowInd] }
if (is.null(labCol)) {
labCol <- if (is.null(colnames(x)))
(1:nc)[colInd]
else colnames(x)
}else { labCol <- labCol[colInd] }
if (scale == "row") {
retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
x <- sweep(x, 1, rm)
retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
x <- sweep(x, 1, sx, "/")
}
else if (scale == "column") {
retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
x <- sweep(x, 2, rm)
retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
x <- sweep(x, 2, sx, "/")
}
##set breaks for colorkey and heat map
if (missing(breaks) | is.null(breaks) | length(breaks) < 1) {
if (missing(colrange) || is.function(colrange))
breaks <- 20
else if( is.vector(colrange) == TRUE & length(colrange) > 3) { #pre-define colors
breaks <- length(col) + 1
}else {
breaks <- 20
}
}
if (length(breaks) == 1) { #if already
if( quanbreaks == TRUE ) {
breaks <- quantile( unique(c(x)), probs = seq(0, 1, length = breaks), na.rm = TRUE)
}else {
if (!symbreaks)
breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm), length = breaks)
else {
extreme <- max(abs(x), na.rm = TRUE)
breaks <- seq(-extreme, extreme, length = breaks)
}
}
}
nbr <- length(breaks)
ncol <- length(breaks) - 1
if (class(colrange) == "function") {
col <- colrange(ncol)
}else if( is.vector(colrange) == TRUE & length(colrange) >= 2 ) {
col <- colorRampPalette(colrange)(nbr - 1) #for three col
}else{
col <- colorRampPalette(c("green", "black", "red"))(nbr - 1) #for three col
}
min.breaks <- min(breaks)
max.breaks <- max(breaks)
x[x < min.breaks] <- min.breaks
x[x > max.breaks] <- max.breaks
if (missing(lhei) | is.null(lhei))
lhei <- c(keysize, 4)
if (missing(lwid) | is.null(lwid))
lwid <- c(keysize, 4)
if (missing(lmat) | is.null(lmat)) {
lmat <- rbind(4:3, 2:1)
if (!missing(ColSideColors)) {
if (!is.character(ColSideColors) | length(ColSideColors) != nc) {
stop("'ColSideColors' must be a character vector of length ncol(x)")
}
lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
lhei <- c(lhei[1], 0.2, lhei[2])
}
if (!missing(RowSideColors)) {
if (!is.character(RowSideColors) | length(RowSideColors) != nr) {
stop("'RowSideColors' must be a character vector of length nrow(x)")
}
lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1), lmat[, 2] + 1)
lwid <- c(lwid[1], 0.2, lwid[2])
}
lmat[is.na(lmat)] <- 0
}
if (length(lhei) != nrow(lmat))
stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
if (length(lwid) != ncol(lmat))
stop("lwid must have length = ncol(lmat) =", ncol(lmat))
op <- par(no.readonly = TRUE)
on.exit(par(op))
layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
if (!missing(RowSideColors)) {
par(mar = c(margins[1], 1, 1, 0.5))
image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
}
if (!missing(ColSideColors)) {
par(mar = c(0.5, 0, 0, margins[2]))
image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
}
par(mar = c(margins[1], 0, 0, margins[2]))
x <- t(x)
cellnote <- t(cellnote)
if (revC) {
iy <- nr:1
if (exists("ddr")) { ddr <- rev(ddr) }
x <- x[, iy]
cellnote <- cellnote[, iy]
}
else { iy <- 1:nr }
image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + c(0, nr),
axes = FALSE, xlab = "" , ylab = "", col = col, breaks = breaks, ...)
retval$carpet <- x
if (exists("ddr"))
retval$rowDendrogram <- ddr
if (exists("ddc"))
retval$colDendrogram <- ddc
retval$breaks <- breaks
retval$col <- col
if (!invalid(na.color) & any(is.na(x))) {
mmat <- ifelse(is.na(x), 1, NA)
image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "", col = na.color, add = TRUE)
}
axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0, cex.axis = cexCol)
if (!is.null(xlab))
mtext(xlab, side = 1, line = margins[1] - 1.25)
axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0, cex.axis = cexRow)
if (!is.null(ylab))
mtext(ylab, side = 4, line = margins[2] - 1.25)
if (!missing(add.expr))
eval(substitute(add.expr))
if (!missing(colsep))
for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, length(csep)),
xright = csep + 0.5 + sepwidth[1], ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1,
col = sepcolor, border = sepcolor)
if (!missing(rowsep))
for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 1 - rsep) - 0.5,
xright = nrow(x) + 1, ytop = (ncol(x) + 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1,
col = sepcolor, border = sepcolor)
min.scale <- min(breaks)
max.scale <- max(breaks)
x.scaled <- scale01(t(x), min.scale, max.scale)
if (trace %in% c("both", "column")) {
retval$vline <- vline
vline.vals <- scale01(vline, min.scale, max.scale)
for (i in colInd) {
if (!is.null(vline)) {
abline(v = i - 0.5 + vline.vals, col = linecol,
lty = 2)
}
xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
xv <- c(xv[1], xv)
yv <- 1:length(xv) - 0.5
lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
}
}
if (trace %in% c("both", "row")) {
retval$hline <- hline
hline.vals <- scale01(hline, min.scale, max.scale)
for (i in rowInd) {
if (!is.null(hline)) {
abline(h = i + hline, col = linecol, lty = 2)
}
yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
yv <- rev(c(yv[1], yv))
xv <- length(yv):1 - 0.5
lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
}
}
if (!missing(cellnote))
text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote), col = notecol, cex = notecex)
par(mar = c(margins[1], 0, 0, 0))
if (dendrogram %in% c("both", "row")) {
plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
}
else plot.new()
par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
if (dendrogram %in% c("both", "column")) {
plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
}
else plot.new()
if (!is.null(main))
title(main, cex.main = 1.5 * op[["cex.main"]])
#for key
if (key) {
# par(mar = c(5, 4, 2, 1), cex = 0.75)
tmpbreaks <- breaks
if (symkey) {
max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
min.raw <- -max.raw
tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
}
else {
min.raw <- min(x, na.rm = TRUE)
max.raw <- max(x, na.rm = TRUE)
}
z <- seq(min.raw, max.raw, length = length(col))
# image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks, xaxt = "n", yaxt = "n")
image(z = matrix(z, ncol = 1), col = col, xaxt = "n", yaxt = "n", xlab = "", ylab = "")
par(usr = c(0, 1, 0, 1))
lv <- pretty(breaks)
xv <- scale01(as.numeric(lv), min.raw, max.raw)
axis(1, at = xv, labels = lv)
if(scale == "row") { mtext(side = 1, paste(keynote, "(Row Z-Score)", sep=""), line = 2)
}else if(scale == "column") {mtext(side = 1, paste(keynote, "(Column Z-Score)", sep=""), line = 2)
}else { mtext(side = 1, keynote, line = 2) }
if (density.info == "density") {
dens <- density(x, adjust = densadj, na.rm = TRUE)
omit <- dens$x < min(breaks) | dens$x > max(breaks)
dens$x <- dens$x[-omit]
dens$y <- dens$y[-omit]
dens$x <- scale01(dens$x, min.raw, max.raw)
lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
lwd = 1)
axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
# title("Color Key\nand Density Plot")
par(cex = 0.5)
mtext(side = 2, "Density", line = 2)
}
else if (density.info == "histogram") {
h <- hist(x, plot = FALSE, breaks = breaks)
hx <- scale01(breaks, min.raw, max.raw)
hy <- c(h$counts, h$counts[length(h$counts)])
lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
col = denscol)
axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
# title("Color Key\nand Histogram")
par(cex = 0.5)
mtext(side = 2, "Count", line = 2)
}
else title("")
}
else plot.new()
retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
high = retval$breaks[-1], color = retval$col)
invisible(retval)
return( list(retval = retval, hcr = hcr, hcc = hcc) )
}
##check whether x is a data matrix and k for KNN-based MI esimator.
.check.matrix <- function(x, k, name) {
if ((!is.matrix(x) && !is.data.frame(x)) || nrow(x) < 2) {
stop(paste(name, "must be a multi-row matrix or a data.frame"))
} else if (ncol(x) <= k) {
stop(paste(name, " has too few columns (must be > ", k, ")", sep=""))
}
}
##check the variable with multiple elements.
.check.variable <- function(var, name, candidates) {
if( is.vector(var) ){
vartmp = var[1]
}
if( length(vartmp) == 0 | is.null(var) == TRUE ) {
stop( paste(name, "must have a value with the candicate parameters."))
}
if( length( which( candidates == vartmp ) ) == 0 ){
stop( paste(name, "must have a value with the candicate parameters.") )
}
vartmp
}
#correlations for gene pairs of all genes
.cor_all <- function(xs, rowidx = NULL, colidx = NULL, corMethod = "GCC", cpus = 1, saveType = "matrix", backingpath = NULL, backingfile = "adj_mat", descriptorfile = "adj_desc" ) {
h <- nrow(xs)
w <- ncol(xs)
k <- 3
noise <- 0.0
subrownum <- 0
subcolnum <- 0
if( (length(rowidx) != h) | (length(colidx) != w) ) {
subrownum <- length(rowidx)
subcolnum <- length(colidx)
}
corIndex <- c(1,2,3,4, 5)
names(corIndex) <- c("GCC", "PCC", "SCC", "KCC", "ED")
curcorIndex <- corIndex[corMethod]
res <- NULL
xsix <- NULL
Rownames <- NULL
Colnames <- NULL
#all gene paris considered
if( (length(rowidx) == h) & (length(colidx) == h) ) {
Rownames <- rownames(xs)
Colnames <- Rownames
if( corMethod == "GCC" || corMethod == "SCC" ) {
xsix <- apply( xs, 1, function(x) sort( sort(x,index.return=TRUE)$ix, index.return=TRUE)$ix )
res <- .C("c_cor_all", as.integer(curcorIndex), as.double(t(xs)), as.integer(xsix), as.integer(h), as.integer(w), as.integer(k), as.double(noise), as.integer(cpus), res = double(h*h), PACKAGE = "rsgcc", DUP = TRUE)$res
}else if( corMethod == "PCC" || corMethod == "KCC" || corMethod == "ED" ) {
res <- .C("c_cor_all", as.integer(curcorIndex), as.double(t(xs)), as.integer(xsix), as.integer(h), as.integer(w), as.integer(k), as.double(noise), as.integer(cpus), res = double(h*h), PACKAGE = "rsgcc", DUP = TRUE)$res
}else{
stop("Error: undefined cor method).\n")
}
}else { #part gene paris considered
Rownames <- rownames(xs)[rowidx]
Colnames <- rownames(xs)[colidx]
if( corMethod == "GCC" || corMethod == "SCC" ) {
xsix <- apply( xs, 1, function(x) sort( sort(x,index.return=TRUE)$ix, index.return=TRUE)$ix )
res <- .C("c_cor_subset", as.integer(curcorIndex), as.double(t(xs)), as.integer(xsix), as.integer(h), as.integer(w), as.integer(k), as.double(noise), as.integer(cpus), as.integer(rowidx), as.integer(colidx), as.integer(subrownum), as.integer(subcolnum), res = double(subrownum*subcolnum), PACKAGE = "rsgcc", DUP = TRUE)$res
}else if( corMethod == "PCC" || corMethod == "KCC" || corMethod == "ED" ) {
res <- .C("c_cor_subset", as.integer(curcorIndex), as.double(t(xs)), as.integer(xsix), as.integer(h), as.integer(w), as.integer(k), as.double(noise), as.integer(cpus), as.integer(rowidx), as.integer(colidx), as.integer(subrownum), as.integer(subcolnum), res = double(subrownum*subcolnum), PACKAGE = "rsgcc", DUP = TRUE)$res
}else{
stop("Error: undefined cor method).\n")
}
}
m <- t( matrix(res, nrow=length(Colnames)) )
rownames(m) <- Rownames
colnames(m) <- Colnames
if( saveType == "bigmatrix" ) {
options(bigmemory.allow.dimnames=TRUE)
if( is.null(backingpath) )
backingpath <- getwd()
m <- as.big.matrix(m, type = "double", separated=FALSE, backingfile= backingfile, backingpath= backingpath, descriptorfile = descriptorfile, shared = TRUE)
}
m
}
#############################################################################################
#generate adjacency matrix from a gene expression data
#Modified on 2012-11-28. two parameters (genes.row, genes.col) were added for generating a correlation matrix with length(genes.row) X length(genes.col).
#' @export
adjacencymatrix <- function(mat, genes.row = NULL, genes.col = NULL, method = c("GCC", "PCC", "SCC", "KCC", "BiWt", "MI", "MINE", "ED"), k = 3, cpus = 1, saveType = "matrix", backingpath = NULL, backingfile = "adj_mat", descriptorfile = "adj_desc", ... ) {
call <- match.call()
.check.matrix(mat, k, "xs")
method <- .check.variable(method, "method", c("GCC", "PCC", "SCC", "KCC", "BiWt", "MI", "MINE", "ED") )
if (method == "MI" && k < 2)
stop("k must be >= 2.")
geneNames <- rownames(mat)
if( is.null(geneNames) )
geneNames <- c(1:nrow(mat))
##for BiWt
if( method == "BiWt" ) {
m <- cor.matrix(mat, cpus = cpus, cormethod= method, style= "all.pairs", pernum= 0, sigmethod= "two.sided", output = "matrix")$corMatrix
rownames(m) <- rownames(mat)
colnames(m) <- rownames(mat)
if( saveType == "bigmatrix" ) {
options(bigmemory.allow.dimnames=TRUE)
if( is.null(backingpath) )
backingpath <- getwd()
m <- as.big.matrix(m, type = "double", separated=FALSE, backingfile= backingfile, backingpath= backingpath, descriptorfile = descriptorfile, shared = TRUE)
}#end if bigmatrix
return(m)
}
##for mine
if( method == "MINE" ) {
m <- mine(t(mat))$MIC
rownames(m) <- rownames(mat)
colnames(m) <- rownames(mat)
if( saveType == "bigmatrix" ) {
options(bigmemory.allow.dimnames=TRUE)
if( is.null(backingpath) )
backingpath <- getwd()
m <- as.big.matrix(m, type = "double", separated=FALSE, backingfile= backingfile, backingpath= backingpath, descriptorfile = descriptorfile, shared = TRUE)
}#end if bigmatrix
return(m)
}
#for other methods
Rownames <- geneNames
Colnames <- geneNames
row.idx <- c(1:nrow(mat))
col.idx <- c(1:nrow(mat))
if( !is.null(genes.row) ) {
row.idx <- match( genes.row, geneNames)
if( length( which(is.na(row.idx)) ) > 0 ) {
stop("Error: some gene names in genes.row are not found in rownames(mat).\n")
}
Rownames <- rownames(mat)[row.idx]
}
if( !is.null(genes.col) ) {
col.idx <- match( genes.col, geneNames)
if( length( which(is.na(col.idx)) ) > 0 ) {
stop("Error: some gene names in genes.col are not found in rownames(mat).\n")
}
Colnames <- rownames(mat)[col.idx]
}
#for knnmi
m <- NULL
if( method == "MI" ){
if( is.null(genes.row) | is.null(genes.col) ) {
m <- knnmi.cross( mat[row.idx,], mat[col.idx,], k, noise=1e-12 )
}else {
m <- knnmi.all(mat, k, noise=1e-12)
}
rownames(m) <- Rownames
colnames(m) <- Colnames
}else{
m <-.cor_all(xs = mat, rowidx = row.idx, colidx = col.idx, corMethod = method, cpus = cpus, saveType = saveType, backingpath = backingpath, backingfile = backingfile, descriptorfile = descriptorfile)
}
m
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.