#---------------------------------------------------------------------------------------------------------
#ModulePrincComps, which computes the first principal component of each module.
#Also it provides the variance explained for the first 5 PCs of a module.
#It is based on the singular value decomposition.
#It takes as input datExpr for which the rows are samples and the columns are genes.
#Here is how you would use it
#PC1=ModulePrinComps2[datExpr,color1]
#Then PC1[[1]] provides a data frame with the first PC of each module
#PC1[[2]] provides a data frame where each column contains the percentages of
#the variance explained by the first 10 PCs by module.
#---------------------------------------------------------------------------------------------------------
# the output is a list and each element is a matrix whose number of columns equals the number of modules
# the first element of the list contains the 1st principal components of all modules, and
# the second element of the list contains the 2nd principal components of all modules, etc...
# the last element is the expained variations of top "no.pcs" PCs in each modules
#---------------------------------------------------------------------------------------------------------
require(impute)
ModuleHubProfileSingle = function(datexpr, no_pcs=10) {
ngenes = dim(datexpr)[1]
print("PC by ModuleHubProfileSingle .................. ")
corrlmatrix = cor( t(datexpr), use = "pairwise.complete.obs")
corrlmatrix = abs(corrlmatrix)
diag(corrlmatrix)<- 0
kin <- apply(corrlmatrix,2,sum, na.rm=TRUE)
orderK = order(-kin)
# select no.pcs genes which are ebv
step = as.integer(ngenes/no_pcs)
if(step<1){step=1; }
selIdx= seq(from=1,to=ngenes, by=step)
# mimic values from SVD
#
v = t(datexpr[selIdx[1:no_pcs], ])
d = rep(0, no_pcs)
ret = NULL
ret$v = v
ret$d = d
return(ret)
}
ModulePrinComps = function(datexpr,modules, min_modulesize=10) {
couleur = modules;
datEXpr = t(datExpr)
no.pcs=10
allmean = mean(datexpr, na.rm=T)
#modlevels= names(table(couleur)) #levels(factor(couleur))
modlevels <- names(couleur)
#the first no.pcs of elements in the list are PCs and the last one is the variations
listPCs = as.list(rep(NULL, no.pcs+1) )
for (i in c(1:no.pcs) ){
listPCs[[i]] = data.frame(matrix(0,nrow=dim(datexpr)[[1]],ncol= length(modlevels)))
colnames(listPCs[[i]]) <- modlevels
}
listPCs[[ no.pcs+1] ]= data.frame(matrix(0,nrow=no.pcs,ncol= length(modlevels))) #varexplained
colnames(listPCs[[ no.pcs+1] ]) <- modlevels
for(i in c(1:length(modlevels)) ){
#print(paste(i, modlevels[i]) )
modulename = modlevels[i]
#restrict1= as.character(couleur)== modulename
restrict1 = colnames(datexpr) %in% couleur[[i]]
restrict1= ifelse(is.na(restrict1), FALSE, restrict1)
if(modlevels[i]=="grey"){
next
}
# in the following, rows are genes and columns are samples
datModule=t(datexpr[, restrict1])
dim(datModule)
# check whether some samples have missing rate (missing value in column) >80%
naDat = is.na(datModule)
nasBySample= apply(naDat, 2, sum)
nacutoff = 0.6*dim(datModule)[1]
selSamples = nasBySample>=nacutoff
if(sum(selSamples)>0){
print("patch samples with missing rate >=0.8")
naSampleIdx = c(1:length(nasBySample))[selSamples]
for(x in naSampleIdx){
#print(paste("Sample Idx=", x) )
datModule[,x]=ifelse(is.na( datModule[,x]), allmean, datModule[,x])
}
}
if(sum(restrict1)<min_modulesize){
listPCs[[1] ][,i] = datModule[1,]
next
}
imputed=impute.knn(as.matrix(datModule)) #$data for R version < 2.0
datModule =imputed$data #=imputed for R < 2.9
datModule=t(scale(t(datModule)))
# use the hub's profuile as PC if svd doesn't converge
#svd1=svd(datModule)
svd1=tryCatch(svd(datModule), error=function(e) ModuleHubProfileSingle(datModule), no_pcs=no.pcs)
mtitle=paste("PCs of ", modulename," module", sep="")
no.samples = dim(datModule)[2]
actualpcs=min(dim(svd1$v)[2], no.pcs)
#cat(modulename, as.character(i), as.character(actualpcs), "\n")
#explained variation (%)
listPCs[[ no.pcs+1] ][,i]= (svd1$d[1:no.pcs])^2/sum(svd1$d^2)
# this is the first principal component, the ith column of the j-th element in the list
for (j in c(1:actualpcs) ){
#print(j)
pcj=svd1$v[,j]
# detect NAs
jnas = is.na(pcj)
if (sum(jnas)>0){
break;
}
signhj=sign(sum(cor(pcj, t(datModule))))
if( !(signhj == 0 | is.na(signhj)) ) pcj=signhj* pcj
listPCs[[j] ][,i] = pcj
}
}
PC.out = listPCs
PC.res <- data.frame()
l <- length(PC.out)
for (i in 1:(l-1))
{
df <- data.frame(type = rep(paste("PC",i,sep = ""),nrow(PC.out[[i]])),id = colnames(datExpr),
as.data.frame(PC.out[[i]]))
PC.res <- rbind.data.frame(PC.res,df)
rownames(PC.out[[i]]) <- colnames(datExpr)
rm(df)
}
PC.res <- rbind.data.frame(PC.res,
data.frame(type = rep("variance.explained",nrow(PC.out[[l]])),id = paste("PC",1:nrow(PC.out[[l]]),sep = ""),as.data.frame(PC.out[[l]])))
rownames(PC.out[[l]]) <- paste("PC",1:nrow(PC.out[[l]]),sep = "")
names(PC.out) <- c(paste("PC",1:(l-1),sep = ""),"variance.explained")
return(PC.res)
}
ModuleHubProfiles = function(datexpr, adjmatrix, couleur, min_modulesize=10, whichmodule=NULL) {
no.pcs=10
modlevels= names(table(couleur)) #levels(factor(couleur))
#the first no.pcs of elements in the list are PCs and the last one is the variations
listPCs = as.list(rep(NULL, no.pcs+1) )
for (i in c(1:no.pcs) ){
listPCs[[i]] = data.frame(matrix(0,nrow=dim(datexpr)[[1]],ncol= length(modlevels)))
colnames(listPCs[[i]]) <- modlevels
}
listPCs[[ no.pcs+1] ]= data.frame(matrix(0,nrow=no.pcs,ncol= length(modlevels))) #varexplained
colnames(listPCs[[ no.pcs+1] ]) <- modlevels
# take the profile of the most connected gene in each module as PC
#
colorI = as.character(colcode.reduced)
kin = computeModuleLinks(adjmatrix, couleur)
# find the hub in each module
#
kinIdxed= cbind(kin, c(1:length(couleur)), couleur)
orderK = order(-kin)
kinIdxed= kinIdxed[orderK, ]
orderK = order(kinIdxed[,3])
kinIdxed= kinIdxed[orderK, ]
hubIdx = rep(0, length(modlevels) )
for(z in c(1:length(modlevels)) ) {
isel = modlevels[z] == kinIdxed[,3]
ikinIdxed = kinIdxed[isel, ]
# extract hubs' profiles
#
listPCs[[1] ][,z] = datexpr[,as.integer(ikinIdxed [1,2])]
hubIdx[z] = ikinIdxed [1,2]
}
# extract hubs' profiles
#
#listPCs[[1]] = t(datexpr[,as.integer(hubIdx)])
names(listPCs[[1]]) <- modlevels
if(!is.null(whichmodule) ){
wsel = modlevels==whichmodule
widx = c(1:length(modlevels))[widx]
return(listPCs[[1] ][,wsel])
}
return(listPCs)
}
#-------------------------------------------------------------------------
#Function: compute whithin-module number of connections for each gene
#Input:
# adjMatrix ~ adjacency matrix
# colorcodeC ~ color codes (modules) for genes
#Output: in-module number of connections of each gene
#-------------------------------------------------------------------------
computeModuleLinks = function(adjMatrix, colorcodeC, isAdjacency=TRUE, normalized=FALSE, usegreymodule=F)
{
modnames= names( table(colorcodeC) )
no.genes = dim(adjMatrix)[1]
links = rep(0, no.genes)
idxseq = c(1:no.genes)
for (each in modnames ){
if((usegreymodule==F) & (as.character(each)=="grey" ) ){
next
}
whichmod = each==colorcodeC
module.size = sum(whichmod)
if (module.size==1){
next
}
modk <- apply(adjMatrix[whichmod,whichmod],2,sum, na.rm=TRUE)
if(!isAdjacency){
modk <- (module.size -modk)
}
#normalize against the module size
if(normalized==TRUE){
modk = modk/module.size
}
#indices of genes in the current module
idxmod = idxseq * whichmod
#put the links's to the buffer
links[idxmod] = modk
}
links
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.