`generateTreeViewFiles` <-
function(data = NULL,
rclust = NULL,
cclust = NULL,
functionalCategories = NULL,
fClustAnnotations = NULL,
bkgList = NULL,
file = "cluster",
maxSize=1000,
minSize=10,
maxNumOfClusters=NULL,
minGenesInCategory=10,
maxGenesInCategory=1000,
inBkg=TRUE,
sigFDR = 0.1,
addCLEANscore2cdt = TRUE,
species = NULL,
estimateNullDistribution = FALSE,
dec='.',
verbose=TRUE,
saveDataObjects = FALSE,
sampleDesc = NULL,
callTreeView = FALSE) {
if(is.null(data)) {
if(file.access(paste(file, "RData", sep = "."))==0)
data <- paste(file, "RData", sep = ".")
else if(file.access(paste(file, "cdt", sep = "."))==0)
data <- paste(file, "cdt", sep = ".")
else
data <- paste(file, "txt", sep = ".")
}
fromCdtFile <- FALSE
if(is.character(data)) { #is a file name
if(substr(data,nchar(data)-4,nchar(data)) == "RData") {
e1 <- new.env()
load(file = data, envir = e1)
data <- get(ls(envir = e1, pattern = "gimmOut")[1], envir = e1)
}
else if(substr(data,nchar(data)-2,nchar(data)) == "cdt") {
data <- cdt2r(data)
fromCdtFile <- TRUE
} else {
data <- read.table(file = data, sep = "\t", header = TRUE, stringsAsFactors = FALSE)
}
}
if(is.matrix(data)) {
cdt <- cbind(ID=rownames(data), NAME=as.character(rownames(data)), data) #rownames are assumed to be gene IDs
} else if(is.data.frame(data)) {
if(is.numeric(data[,2]) & #second column contains data
length(grep("descr", colnames(data)[2], ignore.case=TRUE)) <= 0 &
length(grep("name", colnames(data)[2], ignore.case=TRUE)) <= 0) {
if(length(grep("id",colnames(data)[1], ignore.case=TRUE)) > 0 | #if first column contains gene id
length(grep("acc",colnames(data)[1], ignore.case=TRUE)) > 0 |
!is.numeric(data[, 1])) {
cdt <- cbind(ID=data[,1], NAME=as.character(data[,1]), data[,-1])
warning("data[, 2] is numeric. Assuming data[, 1] = ID column, data[, 2] data column")
} else { #first column contains data
cdt <- cbind(ID=rownames(data), NAME=as.character(rownames(data)), data) #rownames are assumed to be gene IDs
warning("data[, 1:2] are numeric. Assuming all columns to contain data.")
}
} else {
cdt <- data
}
} else { #is gimmOut object
cdt <- data$clustData
if(is.null(rclust)) rclust <- data$hGClustData
if(is.null(cclust)) cclust <- data$hSClustData
}
make.gtr <- TRUE
if(is.null(rclust)) rclust <- "euclidean" #default is hclust with euclidean distance
if(length(rclust) == 1) {
if(is.na(rclust)) { #no gtr file desired but need dummy file
rclust <- hclust(dist(1:(nrow(cdt))))
rclust$order<-rclust$order[order(rclust$order)]
make.gtr <- FALSE
fromCdtFile <- FALSE
}
}
if(is.character(rclust)) { #is filename or dist method
if(substr(rclust,nchar(rclust)-2,nchar(rclust)) == "gtr") { #import gtr file
rclust <- gtr2r(rclust)
} else if(substr(rclust,nchar(rclust)-1,nchar(rclust)) == "zm") { #import zm file
rclust <-data.matrix(scan(rclust))
rclust <- 1 - matrix(rclust,sqrt(length(rclust)))
} else {
rclust <- dist(cdt[,-(1:2)], method = rclust) #compute distance matrix from data
fromCdtFile <- FALSE
}
}
if(is.numeric(rclust)) {
gtr <- hclust(as.dist(rclust), method="average")
gtr$labels <- 1:nrow(cdt)
}
else gtr <- rclust
if(fromCdtFile) cdt <- cdt[order(gtr$order),]
if(is.null(gtr$labels)) gtr$labels <- 1:nrow(cdt)
if(is.character(gtr$labels)) {
gtr$labels <- 1:nrow(cdt)
warning("rclust$labels are character, reassigned as rclust$labels<-1:nrow(cdt)")
}
if(length(intersect(gtr$labels, 1:nrow(cdt))) < nrow(cdt)) {
gtr$labels <- 1:nrow(cdt)
warning("rclust$labels incompatible, reassigned to rclust$labels<-1:nrow(cdt)")
}
make.atr <- TRUE
if(is.null(cclust)) cclust <- 1-cor(cdt[,-(1:2)]) #default: hclust w/ pearson corr as similarity measure
if(length(cclust) == 1) {
if(is.na(cclust)) { #no atr file desired but need dummy file
cclust <- hclust(dist(1:(ncol(cdt)-2)))
if(is.null(sampleDesc)) {
cclust$order<-cclust$order[order(cclust$order)]
} else {
cclust$order<-(1:length(cclust$order))[order(sampleDesc)]
}
make.atr <- FALSE
}
}
if(is.character(cclust)) { #is filename or dist method
if(substr(cclust,nchar(cclust)-2,nchar(cclust)) == "atr") { #import atr file
cclust <- atr2r(cclust)
} else if(substr(cclust,nchar(cclust)-1,nchar(cclust)) == "zm" |
substr(cclust,nchar(cclust)-2,nchar(cclust)) == "zm2") { #import zm file
cclust <-data.matrix(scan(cclust))
cclust <- 1 - matrix(cclust,sqrt(length(cclust)))
} else {
cclust <- dist(t(cdt[,-(1:2)]), method = cclust) #compute distance matrix from data
}
}
if(is.numeric(cclust)) atr <- hclust(as.dist(cclust), method = "average")
else atr <- cclust
runCLEAN <- TRUE
if(!is.null(fClustAnnotations)){
runCLEAN <- FALSE
if(is.character(fClustAnnotations)){ #is vector of (a) file name(s) for .RData object
fClustAnnotFiles <- fClustAnnotations
fClustAnnotations <- list()
for(i in 1:length(fClustAnnotFiles)) {
e1 <- new.env()
if(file.exists(fClustAnnotFiles[i])) {
load(file = fClustAnnotFiles[i], envir = e1)
fClustAnnotations<- c(fClustAnnotations, get("fClustAnnotations", envir = e1))
} else {
warning("File ", fClustAnnotFiles[i], " not found. Functional Cluster Annotations not loaded.")
}
}
}
}
make.fni <- TRUE
if(is.null(functionalCategories)) functionalCategories <- "GO" #default is GO categories
if(length(functionalCategories) == 1) {
if(is.na(functionalCategories)) { #no fni file desired
runCLEAN <- FALSE
make.fni <- FALSE
}
}
if(runCLEAN) {
if(!is.list(functionalCategories)) functionalCategories <- as.list(unique(functionalCategories))
CategoryID2GeneID <- list()
CategoryID2Desc <- list()
runCLEAN <- rep(TRUE, length(functionalCategories))
for(i in 1:length(functionalCategories)) {
if(is.character(functionalCategories[[i]])) {
if(substr(functionalCategories[[i]],nchar(functionalCategories[[i]])-4,nchar(functionalCategories[[i]])) == "RData") {
e1 <- new.env()
load(file = functionalCategories[[i]], envir = e1)
if(length(ls(envir = e1, pattern = "2[G,g][E,e][N,n][E,e]")) > 0) {
l <- list(get(ls(envir = e1, pattern = "2[G,g][E,e][N,n][E,e]")[1], envir = e1))
names(l) <- names(functionalCategories)[i]
if(is.null(names(l))) names(l) <- substr(functionalCategories[[i]], 1, nchar(functionalCategories[[i]])-6)
CategoryID2GeneID <- c(CategoryID2GeneID, l)
} else {
runCLEAN[i] <- FALSE
CategoryID2GeneID <- c(CategoryID2GeneID, list(NA))
warning(paste("No functional categories found in file", functionalCategories[[i]], "- No functional clustering annotation generated."))
}
if(length(ls(envir = e1, pattern = "2[D,d][E,e][S,s][C,c]")) > 0) {
l <- list(get(ls(envir = e1, pattern = "2[D,d][E,e][S,s][C,c]")[1], envir = e1))
names(l) <- names(functionalCategories)[i]
if(is.null(names(l))) names(l) <- substr(functionalCategories[[i]], 1, nchar(functionalCategories[[i]])-6)
CategoryID2Desc <- c(CategoryID2Desc, l)
} else CategoryID2Desc <- c(CategoryID2Desc, list(NA))
} else {
l1 <- getFunctionalCategories(functionalCategories[[i]], species)
if(length(l1) > 0) {
l <- l1[[1]][1]
names(l) <- functionalCategories[[i]]
CategoryID2GeneID <- c(CategoryID2GeneID, l)
if(length(l1[[1]]) > 1) {
l <- l1[[1]][2]
names(l) <- functionalCategories[[i]]
CategoryID2Desc <- c(CategoryID2Desc, l)
} else CategoryID2Desc <- c(CategoryID2Desc, list(NA))
} else {
runCLEAN[i] <- FALSE
CategoryID2GeneID <- c(CategoryID2GeneID, list(NA))
CategoryID2Desc <- c(CategoryID2Desc, list(NA))
warning(paste("functional categories", functionalCategories[[i]], "not found. No functional clustering annotation generated."))
}
}
} else { #functionalCategories[[i]] is a list
l <- functionalCategories[[i]][1]
names(l) <- names(functionalCategories)[i]
CategoryID2GeneID <- c(CategoryID2GeneID, l)
if(length(functionalCategories[[i]]) > 1) {
l <- functionalCategories[[i]][2]
names(l) <- names(functionalCategories)[i]
CategoryID2Desc <- c(CategoryID2Desc, l)
}
else CategoryID2Desc <- c(CategoryID2Desc, list(NA))
}
}
}
r2cdt(gtr,atr,cdt,labels=TRUE, description=TRUE,file=paste(file, "cdt",sep="."),dec=dec)
if(!make.atr & !is.null(sampleDesc)) {
cdtFromFile <- read.table(file=paste(file, "cdt",sep="."), header = FALSE, sep = "\t", quote = "", stringsAsFactors = FALSE, check.names=FALSE)
data.start.col <- which(cdtFromFile[1,] == "GWEIGHT") + 1
if (length(data.start.col) < 1) {
if (colnames(cdt)[1] == "GID")
data.start.col <- 4
else data.start.col <- 3
}
newCdt <- cdtFromFile[,1:(data.start.col-1)]
l <- data.start.col
lSampleDesc <- levels(as.factor(sampleDesc))
for(f in lSampleDesc[-length(lSampleDesc)]) {
l <- l + sum(sampleDesc == f)
newCdt <- cbind(newCdt, cdtFromFile[,data.start.col:(l-1)], "")
data.start.col <- l
}
l <- l + sum(sampleDesc == lSampleDesc[length(lSampleDesc)])
newCdt <- cbind(newCdt, cdtFromFile[,data.start.col:(l-1)])
write.table(newCdt, file=paste(file, "cdt",sep="."), sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
}
if(make.atr)
r2atr(atr,file=paste(file,"atr",sep ="."), dec=dec)
if(make.gtr) {
r2gtr(gtr,file=paste(file,"gtr",sep ="."), dec=dec)
if(make.fni) {
if(any(runCLEAN)) {
fClustAnnotations <- list()
for(i in 1:length(runCLEAN)) {
if(runCLEAN[i]) {
l <- CLEAN(tree = gtr, allGenes=cdt[,1], CategoryID2GeneID=CategoryID2GeneID[[i]],
CategoryID2Desc = CategoryID2Desc[i], refGenes = unique(bkgList), maxSize=maxSize,
minSize=minSize, maxNumOfClusters=maxNumOfClusters, minGenesInCategory=minGenesInCategory,
maxGenesInCategory=maxGenesInCategory, verbose=verbose,inBkg=inBkg, sigFDR=sigFDR,
saveDataObjects=saveDataObjects, estimateNullDistribution = estimateNullDistribution)
names(l) <- names(CategoryID2GeneID)[i]
fClustAnnotations <- c(fClustAnnotations, l)
}
}
}
if(length(fClustAnnotations) > 0) {
if(addCLEANscore2cdt) {
cwCLEANscore.all <- rep(0, dim(cdt)[1])
for(i in 1:length(fClustAnnotations)) {
#if(any(fClustAnnotations[[i]]$minFDR > 0))
# minFDR <- pruneMinFDR(fClustAnnotations[[i]])
#else
# minFDR <- fClustAnnotations[[i]]$sminFDR
cwCLEANscore <- fClustAnnotations[[i]]$cwCLEANscore
if(!is.null(cwCLEANscore)) {
cwCLEANscore[cwCLEANscore > 300] <- 300
sigLevels <- c(1, 3, 5, 10, 15, 20, 50, 100, 150, 200, 250, 299)
if(max(cwCLEANscore) > 0) {
newCdt<-insertCdt(cdtFile=paste(file, "cdt", sep = "."), insertVector = "", tree=gtr,columnName="", newCdtFile=NULL) #insert blank column
for(sigLevel in sigLevels[sigLevels < max(cwCLEANscore)]) {
newCdt<-insertCdt(cdtFile=paste(file, "cdt", sep = "."), insertVector=cwCLEANscore-sigLevel,
tree=gtr,columnName=paste(names(fClustAnnotations)[i], ": CLEANscore-", sigLevel, sep = ""), newCdtFile=NULL)
}
}
cwCLEANscore.all <- apply(cbind(cwCLEANscore.all,cwCLEANscore), 1, max)
}
}
if(length(fClustAnnotations) > 1 & max(cwCLEANscore.all) > 0) {
sigLevels <- c(1, 3, 5, 10, 15, 20, 50, 100, 150, 200, 250, 299)
newCdt<-insertCdt(cdtFile=paste(file, "cdt", sep = "."), insertVector = "", tree=gtr,columnName="", newCdtFile=NULL) #insert blank column
for(sigLevel in sigLevels[sigLevels < max(cwCLEANscore.all)]) {
newCdt<-insertCdt(cdtFile=paste(file, "cdt", sep = "."), insertVector=cwCLEANscore.all-sigLevel,
tree=gtr,columnName=paste("Overall CLEANscore-", sigLevel, sep = ""), newCdtFile=NULL)
}
}
}
r2fni(gtr, clustCategories = fClustAnnotations, file=paste(file, "fni", sep = "."))
}
}
}
if(callTreeView) call.treeview(paste(file,"cdt",sep ="."))
if(saveDataObjects) {
gimmOut <- NULL
gimmOut$clustData <- cdt
gimmOut$hGClustData <- gtr
gimmOut$hSClustData <- atr
save(gimmOut, fClustAnnotations, file = paste(file,"RData",sep ="."))
}
invisible(list(data = cdt, rclust = gtr, cclust = atr, fClustAnnotations= fClustAnnotations))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.