Nothing
#' Illustration of partition density
#'
#' Calculate the average density of all resulting modules from a partition. The
#' density of each module is defined as the average adjacency of the module
#' genes.
#'
#' @param ADJ gene similarity matrix
#' @param PartitionSet vector indicates the partition label for genes
#'
#' @return partition density, defined as average density of all modules
#'
#' @references Langfelder, Peter, and Steve Horvath. "WGCNA: an R package for
#' weighted correlation network analysis." BMC bioinformatics 9.1 (2008): 1.
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords density
#'
#' @examples
#' data(synthetic)
#' ADJ1=abs(cor(datExpr1,use="p"))^10
#' dissADJ=1-ADJ1
#' hierADJ=hclust(as.dist(dissADJ), method="average" )
#' groups <- cutree(hierADJ, h = 0.8)
#' pDensity <- PartitionDensity(ADJ1,groups)
#' @export
#'
PartitionDensity <- function(ADJ, PartitionSet){
labels <- unique(PartitionSet)
numP <- length(labels)
pdensity <- 0
for (i in seq_len(numP)){
idx = which(PartitionSet == labels[i])
if (length(idx) == 1){
pdensity <- pdensity + 0
}else{
Aq = ADJ[idx,idx]
sum = sum(Aq)
nrow = dim(Aq)[1]
pdensity <- pdensity + sum*(sum-nrow)/(nrow*nrow-nrow)
}
}
pdensity <- pdensity*2/sum(ADJ)
pdensity
}
#' Illustration of modularity density
#'
#' Calculate the average modularity of a partition. The modularity of each
#' module is defined from a natural generalization of unweighted case.
#'
#' @param ADJ gene similarity matrix
#' @param PartitionSet vector indicates the partition label for genes
#'
#' @return partition modularity, defined as average modularity of all modules
#'
#' @references Newman, Mark EJ. "Analysis of weighted networks." Physical
#' review E 70.5 (2004): 056131.
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords modularity
#'
#' @examples
#' data(synthetic)
#' ADJ1=abs(cor(datExpr1,use="p"))^10
#' dissADJ=1-ADJ1
#' hierADJ=hclust(as.dist(dissADJ), method="average" )
#' groups <- cutree(hierADJ, h = 0.8)
#' pDensity <- PartitionModularity(ADJ1,groups)
#'
#' @export
#'
PartitionModularity <- function(ADJ, PartitionSet){
labels <- unique(PartitionSet)
numP <- length(labels)
m = sum(ADJ)
K = colSums(ADJ)
pModularity <- 0
for (i in seq_len(numP)){
idx = which(PartitionSet == labels[i])
if (length(idx) == 1){
pModularity <- pModularity + 0
}else{
Aq = ADJ[idx,idx]
deg = K[idx]
pModularity = pModularity + sum(Aq)- sum(deg*deg)/m
}
}
pModularity <- pModularity/m
pModularity
}
#' Modules detection by hierarchical clustering
#'
#' Module detection based on the optimal cutting height of dendrogram, which is
#' selected to make the average density or modularity of resulting partition
#' maximal. The clustering and visulization function are from WGCNA.
#'
#' @param datExpr gene expression profile, rows are samples and columns genes
#' @param foldername where to store the clusters
#' @param indicatename normally a specific tag of condition
#' @param cutmethod cutting the dendrogram based on maximal average Density
#' or Modularity
#' @param power the power parameter of WGCNA, W_{ij}=|cor(x_i,x_j)|^power
#'
#' @return The number of clusters
#'
#' @references Langfelder, Peter, and Steve Horvath. "WGCNA: an R package for
#' weighted correlation network analysis." BMC bioinformatics 9.1 (2008): 1.
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords cutting dendrogram
#' @seealso \code{\link{PartitionDensity}}
#' @seealso \code{\link{PartitionModularity}}
#'
#' @import WGCNA
#' @import igraph
#' @importFrom dynamicTreeCut cutreeDynamic
#' @examples
#' data(synthetic)
#' ResultFolder = 'ForSynthetic' # where middle files are stored
#' CuttingCriterion = 'Density' # could be Density or Modularity
#' indicator1 = 'X' # indicator for data profile 1
#' indicator2 = 'Y' # indicator for data profile 2
#' specificTheta = 0.1 #threshold to define condition specific modules
#' conservedTheta = 0.1#threshold to define conserved modules
#' intModules1 <- WeightedModulePartitionHierarchical(datExpr1,ResultFolder,
#' indicator1,CuttingCriterion)
#' #mymodule <- getPartition(ResultFolder)
#' #randIndex(table(mymodule,truemodule),adjust=F)
#' @export
#'
WeightedModulePartitionHierarchical <- function(datExpr,foldername,indicatename,
cutmethod=c('Density','Modularity'), power=10){
dir.create(file.path('./', foldername), showWarnings = FALSE)
ADJ1=abs(cor(datExpr,use="p"))^power
dissADJ=1-ADJ1
hierADJ=hclust(as.dist(dissADJ), method="average" )
#NumCutHeights = 27440 for DaphIX, too much, just change
#NumCutHeights <- length(hierADJ$height)
cutHeights <- seq(0.05,1,by=0.05)
NumCutHeights <- length(cutHeights)
pDensity <- numeric(length <- NumCutHeights)
maxpDensity <- 0
maxHeight <- 0
for (i in 1:NumCutHeights) {
#groups <- cutree(hierADJ, h = hierADJ$height[i]) # cut tree into 5
groups <- cutree(hierADJ, h = cutHeights[i])
if (cutmethod == 'Density'){
pDensity[i] <- PartitionDensity(ADJ1,groups)
}else{
pDensity[i] <- PartitionModularity(ADJ1,groups)
}
if (pDensity[i] > maxpDensity){
maxpDensity <- pDensity[i]
maxHeight <- cutHeights[i]
}
}
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = hierADJ, distM = dissADJ, cutHeight =
maxHeight,deepSplit = 2, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
dynamicColors = labels2colors(dynamicMods)
intModules = table(dynamicColors)
#######Visualization#############
#pdf(paste(foldername,"/Partitions_",indicatename,".pdf",sep=""),width = 11,
# height = 8)
png(paste(foldername,"/Partitions_",indicatename,".png",sep=""))
marAll = c(1, 5, 3, 1)
layout(matrix(c(1,2,3,0), 2, 2, byrow = TRUE), widths=c(0.8,0.2),
heights=c(0.8,0.2))
par(mar = c(0, marAll[2], marAll[3], marAll[4]))
plot(hierADJ, labels = FALSE, xlab="", sub="", ylim=c(0,1),hang = -1,
axes=FALSE,main = "Gene hierarchical clustering dendrogram")
abline(h = maxHeight, col = 'red')
axis(side = 2, at = seq(1,0,-0.2),labels =seq(1,0,-0.2))
par(mar = c(0, 1, marAll[3], marAll[4]),mgp = c(0, 1, 0))
plot(pDensity, cutHeights, type = 'n', ylim=c(0,1), axes=FALSE,ylab = "")
title(paste('Average',cutmethod), font.main = 1, cex.main = 1,line = 0)
axis(side = 1, at = seq(0,(max(pDensity)+0.1),0.1),
labels = seq(0,(max(pDensity)+0.1),0.1))
axis(side = 2, at = seq(1,0,-0.2),labels =seq(1,0,-0.2))
lines(pDensity, cutHeights, lwd = 2, col = 'blue')
abline(h = maxHeight, col = 'red')
par(mar = c(marAll[1], marAll[2], 0, marAll[4]))
plotColorUnderTree(hierADJ, dynamicColors, groupLabels = NULL,
rowText = NULL, rowLabels = 'Clusters')
dev.off()
for(J in 1:length(intModules)){
idx <- which(dynamicColors == names(intModules)[J])
DenseGenes = colnames(datExpr)[idx]
densegenefile <- paste(foldername,"/DenseModuleGene_",
indicatename,"_",J,".txt",sep="")
write.table(idx,file = paste(foldername,'/DenseModuleGeneID_',indicatename,'_',J,'.txt',sep=''),
quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(DenseGenes,densegenefile,sep = "\n",col.names = FALSE,
row.names = FALSE,quote = FALSE)
}
return (length(intModules))
}
#' Modules detection by spectral clustering
#'
#' Module detection based on the spectral clustering algorithm, which mainly
#' solve the eigendecomposition on Laplacian matrix
#'
#' @param datExpr gene expression profile, rows are samples and columns genes
#' @param foldername where to store the clusters
#' @param indicatename normally a specific tag of condition
#' @param GeneNames normally the gene official names to replace the colnames of datExpr
#' @param power the power parameter of WGCNA, W_{ij}=|cor(x_i,x_j)|^power
#' @param nn the number of nearest neighbor, used to construct the affinity matrix
#' @param k the number of clusters(modules)
#'
#' @return None
#'
#' @references Von Luxburg, Ulrike. "A tutorial on spectral clustering."
#' Statistics and computing 17.4 (2007): 395-416.
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords cutting dendrogram
#'
#'
#' @import igraph
#' @import cluster
#' @examples
#' data(synthetic)
#' ResultFolder <- 'ForSynthetic' # where middle files are stored
#' indicator <- 'X' # indicator for data profile 1
#' GeneNames <- colnames(datExpr1)
#' WeightedModulePartitionSpectral(datExpr1,ResultFolder,indicator,
#' GeneNames,k=5)
#' truemodule <- c(rep(1,100),rep(2,100),rep(3,100),rep(4,100),rep(5,100))
#' #mymodule <- getPartition(ResultFolder)
#' #randIndex(table(mymodule,truemodule),adjust=F)
#' @export
#'
WeightedModulePartitionSpectral <- function(datExpr, foldername, indicatename,
GeneNames, power=6, nn=10, k=2){
dir.create(file.path('./', foldername), showWarnings = FALSE)
ADJ1=abs(cor(datExpr,use="p"))^power
W = TOMsimilarity(ADJ1) # similarity matrix
A = make.affinity(W,nn)
d <- apply(A, 1, sum)
L <- diag(d)-A # unnormalized version
#bottleneck
L <- diag(d^-0.5)%*%L%*% diag(d^-0.5) # normalized version
evL <- eigen(L,symmetric=TRUE) # evL$values is decreasing sorted when symmetric=TRUE
#bottleneck
Z <- evL$vectors[,(ncol(evL$vectors)-k+1):ncol(evL$vectors)]
spc <- pam(Z,k)
colorSpectralTom <- labels2colors(spc$cluster)
intModules = table(colorSpectralTom)
for(J in 1:length(intModules)){
idx <- which(colorSpectralTom == names(intModules)[J])
DenseGenes = GeneNames[idx]
densegenefile <- paste(foldername,"/DenseModuleGene_",
indicatename,"_",J,".txt",sep="")
write.table(idx,file = paste(foldername,'/DenseModuleGeneID_',indicator,'_',J,'.txt',sep=''),
quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(DenseGenes,densegenefile,sep = "\n",col.names = FALSE,
row.names = FALSE,quote = FALSE)
}
}
#' Modules detection by Louvain algorithm
#'
#' Module detection based on the Louvain algorithm, which tries to maximize
#' overall modularity of resulting partition.
#'
#' @param datExpr gene expression profile, rows are samples and columns genes
#' @param foldername where to store the clusters
#' @param indicatename normally a specific tag of condition
#' @param GeneNames normally the gene official names to replace the colnames of datExpr
#' @param maxsize the maximal nodes allowed in one module
#' @param minsize the minimal nodes allowed in one module
#' @param power the power parameter of WGCNA, W_{ij}=|cor(x_i,x_j)|^power
#' @param tao the threshold to cut the adjacency matrix
#'
#' @return The number of clusters
#'
#' @references Blondel, Vincent D., et al. "Fast unfolding of communities in
#' large networks." Journal of statistical mechanics: theory and experiment
#' 2008.10 (2008): P10008.
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords cutting dendrogram
#'
#'
#' @import igraph
#' @examples
#' data(synthetic)
#' ResultFolder <- 'ForSynthetic' # where middle files are stored
#' indicator <- 'X' # indicator for data profile 1
#' GeneNames <- colnames(datExpr1)
#' intModules1 <- WeightedModulePartitionLouvain(datExpr1,ResultFolder,indicator,GeneNames)
#' truemodule <- c(rep(1,100),rep(2,100),rep(3,100),rep(4,100),rep(5,100))
#' #mymodule <- getPartition(ResultFolder)
#' #randIndex(table(mymodule,truemodule),adjust=F)
#' @export
#'
WeightedModulePartitionLouvain <- function(datExpr,foldername,indicatename,GeneNames,
maxsize=200, minsize=30, power=6, tao=0.2){
ADJ <- abs(cor(datExpr,use="p"))^power
#ADJ <- sparse.cor(datExpr)^power
ADJ[ADJ < tao] <- 0
g <- graph_from_adjacency_matrix(ADJ,mode='undirected',weighted=TRUE)
V(g)$name=1:length(V(g))
dir.create(foldername, showWarnings = FALSE)
tmfile <- paste(foldername,'tmp.txt',sep='')
recursiveigraph(g,tmfile,'louvain',maxsize, minsize)
if(!file.exists(tmfile)){
print('Not suitable for Louvain! Try another module identification method')
return (0)
}else
{
num <- modulesRank(foldername,indicatename,GeneNames)
return (num)
}
}
#' Modules detection by AMOUNTAIN algorithm
#'
#' Module detection based on the AMOUNTAIN algorithm, which tries to find the
#' optimal module every time and use a modules extraction way
#'
#' @param datExpr gene expression profile, rows are samples and columns genes
#' @param Nmodule the number of clusters(modules)
#' @param foldername where to store the clusters
#' @param indicatename normally a specific tag of condition
#' @param GeneNames normally the gene official names to replace the colnames of datExpr
#' @param maxsize the maximal nodes allowed in one module
#' @param minsize the minimal nodes allowed in one module
#' @param power the power parameter of WGCNA, W_{ij}=|cor(x_i,x_j)|^pwr
#' @param tao the threshold to cut the adjacency matrix
#'
#' @return None
#'
#' @references Blondel, Vincent D., et al. "Fast unfolding of communities in
#' large networks." Journal of statistical mechanics: theory and experiment
#' 2008.10 (2008): P10008.
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords optimization
#'
#' @import AMOUNTAIN
#' @examples
#' data(synthetic)
#' ResultFolder <- 'ForSynthetic' # where middle files are stored
#' GeneNames <- colnames(datExpr1)
#' intModules1 <- WeightedModulePartitionAmoutain(datExpr1,5,ResultFolder,'X',
#' GeneNames,maxsize=100,minsize=50)
#' truemodule <- c(rep(1,100),rep(2,100),rep(3,100),rep(4,100),rep(5,100))
#' #mymodule <- getPartition(ResultFolder)
#' #randIndex(table(mymodule,truemodule),adjust=F)
#' @export
#'
WeightedModulePartitionAmoutain <- function(datExpr,Nmodule,foldername,indicatename,
GeneNames, maxsize=200, minsize=3, power=6, tao=0.2){
W <- abs(cor(datExpr,use="p"))^power
W[W < tao] <- 0
N = dim(W)[1]
z <- rep(0,N)
idx <- 1
dir.create(foldername, showWarnings = FALSE)
idxname <- 1:N
saveAtomfile <- paste(foldername,'/AtomModule',sep='')
for (ii in 1:Nmodule) {
abegin = 0.01
aend = 0.9
for (i in 1:20) {
#x <- CGPFixSS(W,z,rep(1/N,N),a=(abegin+aend)/2,lambda=0,maxiter = 50)
x <- moduleIdentificationGPFixSS(W,z,rep(1/N,N),a=(abegin+aend)/2,
lambda=0,maxiter = 50)
predictedid <- which(x[[2]]!=0)
if(length(predictedid) > maxsize){
abegin = (abegin+aend)/2
}else if (length(predictedid) < minsize){
aend = (abegin+aend)/2
}else
break
}
if(length(predictedid) <= maxsize){
modulescore = sum(W[predictedid,predictedid])
write.table(as.character(idxname[predictedid]),file = paste(foldername,
'/DenseModuleGeneID_',indicatename,'_',idx,'.txt',sep=''),
quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(GeneNames[predictedid],file = paste(foldername,'/',
floor(modulescore),'-moduleid-',idx,'.txt',sep=''),
quote = FALSE, row.names = FALSE, col.names = FALSE)
idx <- idx+1
}
else if(length(predictedid) > maxsize){
modulescoreW = W[predictedid,predictedid]
print(paste('Atom! with size',length(predictedid),sep=' '))
tmpstr = as.numeric(GeneNames[predictedid])-1
forTotalcompletegraph(tmpstr,modulescoreW,saveAtomfile)
}
W = W[-predictedid,-predictedid]
GeneNames = GeneNames[-predictedid]
z = z[-predictedid]
idxname = idxname[-predictedid]
N = length(GeneNames)
print(paste('Finishing module ',ii,sep=''))
if(N < 3 | sum(W)==0)
break
}
}
#' Illustration of two networks comparison
#'
#' Compare the background network and a condition-specific network. A Jaccard
#' index is used to measure the similarity of two sets, which represents the
#' similarity of each module pairs from two networks.
#'
#' @param sourcehead prefix of where to store results
#' @param nm1 how many modules in the background network
#' @param nm2 how many modules in the condition-specific network
#' @param ind1 indicator of the background network
#' @param ind2 indicator of the condition-specific network
#'
#' @return A matrix where each entry is the Jaccard index of corresponding
#' modules from two networks
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords module comparison
#'
#' @examples
#' data(synthetic)
#' ResultFolder = 'ForSynthetic' # where middle files are stored
#' CuttingCriterion = 'Density' # could be Density or Modularity
#' indicator1 = 'X' # indicator for data profile 1
#' indicator2 = 'Y' # indicator for data profile 2
#' intModules1 <- WeightedModulePartitionHierarchical(datExpr1,ResultFolder,
#' indicator1,CuttingCriterion)
#' intModules2 <- WeightedModulePartitionHierarchical(datExpr2,ResultFolder,
#' indicator2,CuttingCriterion)
#' JaccardMatrix <- comparemodulestwonets(ResultFolder,intModules1,intModules2,
#' paste('/DenseModuleGene_',indicator1,sep=''),
#' paste('/DenseModuleGene_',indicator2,sep=''))
#'
#' @export
#'
comparemodulestwonets <- function(sourcehead,nm1,nm2,ind1,ind2){
my.array<-array(0,dim=c(nm1,nm2))
for(i1 in 1:nm1){
for (i2 in 1:nm2){
# sourcehead = "Networks/DenseModuleGene_"
densegenefile1 <- paste(sourcehead,ind1,"_",i1,".txt",sep="")
densegenefile2 <- paste(sourcehead,ind2,"_",i2,".txt",sep="")
list1 <- readLines(densegenefile1)
list2 <- readLines(densegenefile2)
my.array[i1,i2] = length(intersect(list1,list2))/
length(union(list1,list2))
}
}
return (my.array)
}
#' Illustration of network comparison
#'
#' Compare the background network and a set of condition-specific network.
#' Conserved or condition-specific modules are indicated by the plain files,
#' based on the statistics
#'
#' @param ResultFolder where to store results
#' @param intModules how many modules in the background network
#' @param indicator identifier of current profile, served as a tag in name
#' @param intconditionModules a numeric vector, each of them is the number
#' of modules in each condition-specific network. Or just single number
#' @param conditionNames a character vector, each of them is the name
#' of condition. Or just single name
#' @param specificTheta the threshold to define min(s)+specificTheta,
#' less than which is considered as condition specific module.
#' s is the sums of rows in Jaccard index matrix. See supplementary file.
#' @param conservedTheta The threshold to define max(s)-conservedTheta,
#' greater than which is considered as condition conserved module.
#' s is the sums of rows in Jaccard index matrix. See supplementary file.
#'
#' @return None
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @seealso \code{\link{WeightedModulePartitionHierarchical}},
#' \code{\link{comparemodulestwonets}}
#' @keywords module differential
#'
#' @examples
#' data(synthetic)
#' ResultFolder = 'ForSynthetic' # where middle files are stored
#' CuttingCriterion = 'Density' # could be Density or Modularity
#' indicator1 = 'X' # indicator for data profile 1
#' indicator2 = 'Y' # indicator for data profile 2
#' specificTheta = 0.1 #threshold to define condition specific modules
#' conservedTheta = 0.1#threshold to define conserved modules
#' intModules1 <- WeightedModulePartitionHierarchical(datExpr1,ResultFolder,
#' indicator1,CuttingCriterion)
#' intModules2 <- WeightedModulePartitionHierarchical(datExpr2,ResultFolder,
#' indicator2,CuttingCriterion)
#' CompareAllNets(ResultFolder,intModules1,indicator1,intModules2,indicator2,
#' specificTheta,conservedTheta)
#'
#' @export
#'
CompareAllNets <- function(ResultFolder,intModules,indicator,
intconditionModules,conditionNames,specificTheta,
conservedTheta){
for (i in 1:length(conditionNames)) {
ArrayGroup1 <- comparemodulestwonets(ResultFolder,intModules,
intconditionModules[i],paste('/DenseModuleGeneID_',
indicator,sep=''),paste('/DenseModuleGeneID_',
conditionNames[i],sep=''))
dir.create(paste(ResultFolder,'/',conditionNames[i],sep=''), showWarnings = FALSE)
fileprefix <- paste(ResultFolder,'/',conditionNames[i],'/',sep='')
#pdf(paste(fileprefix,'module_overlap_remove',conditionNames[i],
# '.pdf',sep=''),width = 10, height = 8)
png(paste(fileprefix,'module_overlap_remove',conditionNames[i],
'.png',sep=''))
plot(1:intModules,rowSums(ArrayGroup1),xlim = c(0,(intModules + 1)),
ylim = c(0,max(rowSums(ArrayGroup1)) + 0.1),
xlab="Module ID",ylab = 'RowSums of similarity matrix',
type = 'n',main="Overlapped ratio")
lines(1:intModules, rowSums(ArrayGroup1), col = 'black', type = "b")
abline(h = min(rowSums(ArrayGroup1)) + 0.1, col='red')
abline(h = max(rowSums(ArrayGroup1)) - 0.1, col='green')
text(5, min(rowSums(ArrayGroup1)) - 0.05, "condition response",
col = "red",cex = 0.75)
text(5, max(rowSums(ArrayGroup1)) + 0.05, "conserved response",
col = "green",cex = 0.75)
dev.off()
specificmoduleid <- which(rowSums(ArrayGroup1) <=
min(rowSums(ArrayGroup1)) + specificTheta)
conservedmoduleid <- which(rowSums(ArrayGroup1) >=
max(rowSums(ArrayGroup1)) - conservedTheta)
write.table(specificmoduleid,file = paste(fileprefix,
'sepcificModuleid.txt',sep=''),
row.names = FALSE, col.names = FALSE)
write.table(conservedmoduleid,file = paste(fileprefix,
'conservedModuleid.txt',sep=''),
row.names = FALSE, col.names = FALSE)
}
}
#' Statistics of all conditions
#'
#' Statistics of all conditions. To highlight conserved or condition-specific
#' by counting how frequent each module is lablelled as which, and then
#' visualize the frequency by bar plot.
#'
#' @param ResultFolder where to store results
#' @param intModules how many modules in the background network
#' @param indicator identifier of current profile, served as a tag in name
#' @param conditionNames a character vector, each of them is the name
#' @param legendNames a character vector, each of them is the condition name
#' showing up in the frequency barplot
#' of condition. Or just single name
#' @return None
#'
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @seealso \code{\link{WeightedModulePartitionHierarchical}},
#' \code{\link{WeightedModulePartitionLouvain}},
#' \code{\link{WeightedModulePartitionSpectral}},
#' \code{\link{WeightedModulePartitionAmoutain}},
#' \code{\link{CompareAllNets}}
#' @keywords module differential Statistics
#'
#' @import RColorBrewer
#'
#' @export
#'
ModuleFrequency <- function(ResultFolder,intModules, conditionNames,
legendNames, indicator){
Ncon <- length(conditionNames)
wide <- matrix (0,nrow = Ncon, ncol = intModules)
for (i in 1:Ncon) {
fileprefix <- paste(ResultFolder,'/',conditionNames[i],'/',sep='')
#tad <- read.table(paste(fileprefix,'sepcificModuleid.txt',sep=''),header = TRUE, sep = "")
tad <- as.numeric(readLines(paste(fileprefix,'sepcificModuleid.txt',sep='')))
#moduleid <-tad[,1]
wide[i,tad] <- 1
}
idx1 <- which(colSums(wide)!=0)
# sequential <- brewer.pal(length(conditionNames), "Greens")
# sequential <- brewer.pal(length(conditionNames), col=c('black', 'white', 'blue', 'red', 'yellow', 'purple', 'green'))
# sequential <- c('black', 'white', 'blue', 'red', 'yellow', 'purple', 'green')
if (Ncon > 9){
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sequential <- sample(col_vector, Ncon)
} else
sequential <- brewer.pal(Ncon, "Set1")
pdf(paste(ResultFolder,'/specificmembership.pdf',sep=''),width = 10, height = 5)
# png('specificmembership.png',width = 1000, height = 500)
barplot(wide[,idx1],
names.arg = idx1,
cex.names = 0.7, # makes x-axis labels small enough to show all
col = sequential, # colors
xlab = "module id",
ylab = "if in condition specific network",
xlim = c(0,length(idx1)), # these two lines allow space for the legend
main = 'Frequency of condition specific module',
width = 0.75) # these two lines allow space for the legend
legend("topleft",
legend = legendNames, #in order from top to bottom
fill = sequential, # 6:1 reorders so legend order matches graph
title = "conditions",cex = 0.75)
dev.off()
for (ispec in 1:Ncon) {
idx1 <- intersect(which(colSums(wide)!=0), which(colSums(wide) <= ispec))
uniqueaffiliation <- vector('list',Ncon)
names (uniqueaffiliation) <- conditionNames
for (i in 1:length(idx1)) {
tmp <- which(wide[,idx1[i]] == 1)
for (j in 1:length(tmp)) {
uniqueaffiliation[[tmp[j]]] <- union(uniqueaffiliation[[tmp[j]]],idx1[i])
}
}
write(paste('If we allow at most ', ispec ,' share one condition\n',sep=''),
file = paste(ResultFolder,'/specificmembership.txt',sep=''),append=TRUE)
for (i in 1:Ncon) {
write(conditionNames[i],file = paste(ResultFolder,'/specificmembership.txt',sep=''),append=TRUE)
if(is.null(uniqueaffiliation[[i]])){
write('NULL',file = paste(ResultFolder,'/specificmembership.txt',sep=''),append = TRUE)
}else{
write(uniqueaffiliation[[i]],file = paste(ResultFolder,'/specificmembership.txt',sep=''),append = TRUE)
}
write('\n',file = paste(ResultFolder,'/specificmembership.txt',sep=''),append=TRUE)
}
}
wide2 <- matrix (0,nrow = Ncon, ncol = intModules)
for (i in 1:length(conditionNames)) {
fileprefix <- paste(ResultFolder,'/',conditionNames[i],'/',sep='')
#tad <- read.table(paste(fileprefix,'conservedModuleid.txt',sep=''),header = TRUE, sep = "")
#moduleid <- tad[,1]
tad <- as.numeric(readLines(paste(fileprefix,'conservedModuleid.txt',sep='')))
wide2[i,tad] <- 1
#wide2[i,moduleid] <- 1
}
idx2 <- which(colSums(wide2)!=0)
#sequential <- c('black', 'white', 'blue', 'red', 'yellow', 'purple', 'green')
#sequential <- brewer.pal(Ncon, "Set1")
#sequential <- sample(col_vector, Ncon)
pdf(paste(ResultFolder,'/conservedmembership.pdf',sep=''),width = 10, height = 5)
# png('specificmembership.png',width = 1000, height = 500)
barplot(wide2[,idx2],
names.arg = idx2,
cex.names = 0.7, # makes x-axis labels small enough to show all
col = sequential, # colors
xlab = "module id",
ylab = "if in condition specific network",
xlim = c(0,length(idx2)), # these two lines allow space for the legend
main = 'Frequency of conserved module',
width = 0.75) # these two lines allow space for the legend
legend("topleft",
legend =legendNames, #in order from top to bottom
fill = sequential, # 6:1 reorders so legend order matches graph
title = "conditions",cex = 0.75)
dev.off()
idx <- sort(union(idx1,idx2))
pdf(paste(ResultFolder,'/membership.pdf',sep=''),width = 10, height = 5)
plot(1:length(idx),xaxt = "n",yaxt = "n",
xlim = c(1,length(idx)),ylim = c(-max(colSums(wide2)),max(colSums(wide))),type = "n",
xlab='Module ID',ylab='Membership',main = 'Specification of conditon-specific and conserved module')
#axis(1, at = seq(1, intModules, by = 1), labels=1:intModules)
bp <- barplot(wide[,idx],axes=FALSE,
names.arg = idx,
cex.names = 0.7, # makes x-axis labels small enough to show all
col = sequential, # colors
width = 0.75,
add = TRUE) # these two lines allow space for the legend
barplot(-wide2[,idx],axes=FALSE,
col = sequential, # colors
width = 0.75,add = TRUE)
legend("bottomleft",
legend = legendNames, #in order from top to bottom
fill = sequential, # 6:1 reorders so legend order matches graph
cex = 0.75,bty = "n")# without borders
text(length(idx)-1, 1, 'Conditon specific',cex = 0.75)
text(length(idx)-1, -1, 'Conserved',cex = 0.75)
dev.off()
write.table(idx2,file = paste(ResultFolder,'/conservedmembership.txt',sep=''),
row.names = FALSE, col.names = FALSE)
dir.create(paste(ResultFolder,'/','interestedModules',sep=''))
for (i in idx1) {
file.copy(paste(ResultFolder,"/DenseModuleGene_",indicator,"_",i,".txt",sep=""),
paste(ResultFolder,'/','interestedModules',sep=''))
}
for (i in idx2) {
file.copy(paste(ResultFolder,"/DenseModuleGene_",indicator,"_",i,".txt",sep=""),
paste(ResultFolder,'/','interestedModules',sep=''))
}
}
#' datExpr1
#'
#' Synthetic gene expression profile with 20 samples and 500 genes.
#'
#' @name datExpr1
#' @docType data
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords data
#' @format A matrix with 20 rows and 500 columns.
#' @examples
#' data(synthetic)
#' ## plot the heatmap of the correlation matrix ...
#' \dontrun{heatmap(cor(as.matrix(datExpr1)))}
NULL
#' datExpr2
#'
#' Synthetic gene expression profile with 25 samples and 500 genes.
#'
#' @name datExpr2
#' @docType data
#' @author Dong Li, \email{dxl466@cs.bham.ac.uk}
#' @keywords data
#' @format A matrix with 25 rows and 500 columns.
#' @examples
#' data(synthetic)
#' ## plot the heatmap of the correlation matrix ...
#' \dontrun{heatmap(cor(as.matrix(datExpr2)))}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.