`runCLEAN` <-
function(data = NULL,
rclust = NULL,
functionalCategories = NULL,
bkgList = NULL,
file = "cluster",
maxSize=1000,
minSize=10,
maxNumOfClusters=NULL,
minGenesInCategory=10,
maxGenesInCategory=1000,
inBkg=TRUE,
sigFDR = 0.1,
species = NULL,
estimateNullDistribution = FALSE,
verbose=TRUE,
saveDataObjects = FALSE) {
if(is.null(data)) {
if(file.access(paste(file, "RData", sep = "."))==0)
data <- paste(file, "RData", sep = ".")
else if(file.access(paste(file, "cdt", sep = "."))==0)
data <- paste(file, "cdt", sep = ".")
else
data <- paste(file, "txt", sep = ".")
}
fromCdtFile <- FALSE
if(is.character(data)) { #is a file name
if(substr(data,nchar(data)-4,nchar(data)) == "RData") {
e1 <- new.env()
load(file = data, envir = e1)
data <- get(ls(envir = e1, pattern = "gimmOut")[1], envir = e1)
}
else if(substr(data,nchar(data)-2,nchar(data)) == "cdt") {
data <- cdt2r(data)
fromCdtFile <- TRUE
} else {
data <- read.table(file = data, sep = "\t", header = TRUE, stringsAsFactors = FALSE)
}
}
if(is.matrix(data)) {
cdt <- cbind(ID=rownames(data), NAME=as.character(rownames(data)), data) #rownames are assumed to be gene IDs
} else if(is.data.frame(data)) {
if(is.numeric(data[,2]) & #second column contains data
length(grep("descr", colnames(data)[2], ignore.case=TRUE)) <= 0 &
length(grep("name", colnames(data)[2], ignore.case=TRUE)) <= 0) {
if(length(grep("id",colnames(data)[1], ignore.case=TRUE)) > 0 | #if first column contains gene id
length(grep("acc",colnames(data)[1], ignore.case=TRUE)) > 0 |
!is.numeric(data[, 1])) {
cdt <- cbind(ID=data[,1], NAME=as.character(data[,1]), data[,-1])
warning("data[, 2] is numeric. Assuming data[, 1] = ID column, data[, 2] data column")
} else { #first column contains data
cdt <- cbind(ID=rownames(data), NAME=as.character(rownames(data)), data) #rownames are assumed to be gene IDs
warning("data[, 1:2] are numeric. Assuming all columns to contain data.")
}
} else {
cdt <- data
}
} else { #is gimmOut object
cdt <- data$clustData
if(is.null(rclust)) rclust <- data$hGClustData
}
if(is.null(rclust)) rclust <- "euclidean" #default is hclust with euclidean distance
run.CLEAN = TRUE
if(length(rclust) == 1) {
if(is.na(rclust)) { #no gene clustering specified
run.CLEAN <- FALSE
}
}
if(is.character(rclust)) { #is filename or dist method
if(substr(rclust,nchar(rclust)-2,nchar(rclust)) == "gtr") { #import gtr file
rclust <- gtr2r(rclust)
} else if(substr(rclust,nchar(rclust)-1,nchar(rclust)) == "zm") { #import zm file
rclust <-data.matrix(scan(rclust))
rclust <- 1 - matrix(rclust,sqrt(length(rclust)))
} else {
rclust <- dist(cdt[,-(1:2)], method = rclust) #compute distance matrix from data
fromCdtFile <- FALSE
}
}
if(is.numeric(rclust)) {
gtr <- hclust(as.dist(rclust), method="average")
gtr$labels <- 1:nrow(cdt)
}
else gtr <- rclust
if(fromCdtFile) cdt <- cdt[order(gtr$order),]
if(is.null(gtr$labels)) gtr$labels <- 1:nrow(cdt)
if(is.character(gtr$labels)) {
gtr$labels <- 1:nrow(cdt)
warning("rclust$labels are character, reassigned as rclust$labels<-1:nrow(cdt)")
}
if(length(intersect(gtr$labels, 1:nrow(cdt))) < nrow(cdt)) {
gtr$labels <- 1:nrow(cdt)
warning("rclust$labels incompatible, reassigned as rclust$labels<-1:nrow(cdt)")
}
if(is.null(functionalCategories)) functionalCategories <- "GO" #default is GO categories
if(length(functionalCategories) == 1) {
if(is.na(functionalCategories)) { #no functional categories specified
run.CLEAN <- FALSE
}
}
if(run.CLEAN) {
if(!is.list(functionalCategories)) functionalCategories <- as.list(unique(functionalCategories))
CategoryID2GeneID <- list()
CategoryID2Desc <- list()
run.CLEAN <- rep(TRUE, length(functionalCategories))
for(i in 1:length(functionalCategories)) {
if(is.character(functionalCategories[[i]])) {
if(substr(functionalCategories[[i]],nchar(functionalCategories[[i]])-4,nchar(functionalCategories[[i]])) == "RData") {
e1 <- new.env()
load(file = functionalCategories[[i]], envir = e1)
if(length(ls(envir = e1, pattern = "2[G,g][E,e][N,n][E,e]")) > 0) {
l <- list(get(ls(envir = e1, pattern = "2[G,g][E,e][N,n][E,e]")[1], envir = e1))
names(l) <- names(functionalCategories)[i]
if(is.null(names(l))) names(l) <- substr(functionalCategories[[i]], 1, nchar(functionalCategories[[i]])-6)
CategoryID2GeneID <- c(CategoryID2GeneID, l)
} else {
run.CLEAN[i] <- FALSE
CategoryID2GeneID <- c(CategoryID2GeneID, list(NA))
warning(paste("No functional categories found in file", functionalCategories[[i]], "- No functional clustering annotation generated."))
}
if(length(ls(envir = e1, pattern = "2[D,d][E,e][S,s][C,c]")) > 0) {
l <- list(get(ls(envir = e1, pattern = "2[D,d][E,e][S,s][C,c]")[1], envir = e1))
names(l) <- names(functionalCategories)[i]
if(is.null(names(l))) names(l) <- substr(functionalCategories[[i]], 1, nchar(functionalCategories[[i]])-6)
CategoryID2Desc <- c(CategoryID2Desc, l)
} else CategoryID2Desc <- c(CategoryID2Desc, list(NA))
} else {
l1 <- getFunctionalCategories(functionalCategories[[i]], species)
if(length(l1) > 0) {
l <- l1[[1]][1]
names(l) <- functionalCategories[[i]]
CategoryID2GeneID <- c(CategoryID2GeneID, l)
if(length(l1[[1]]) > 1) {
l <- l1[[1]][2]
names(l) <- functionalCategories[[i]]
CategoryID2Desc <- c(CategoryID2Desc, l)
}
else CategoryID2Desc <- c(CategoryID2Desc, list(NA))
} else {
run.CLEAN[i] <- FALSE
CategoryID2GeneID <- c(CategoryID2GeneID, list(NA))
CategoryID2Desc <- c(CategoryID2Desc, list(NA))
warning(paste("functional categories", functionalCategories[[i]], "not found. No functional clustering annotation generated."))
}
}
} else { #functionalCategories[[i]] is a list
l <- functionalCategories[[i]][1]
names(l) <- names(functionalCategories)[i]
CategoryID2GeneID <- c(CategoryID2GeneID, l)
if(length(functionalCategories[[i]]) > 1) {
l <- functionalCategories[[i]][2]
names(l) <- names(functionalCategories)[i]
CategoryID2Desc <- c(CategoryID2Desc, l)
}
else CategoryID2Desc <- c(CategoryID2Desc, list(NA))
}
}
}
if(any(run.CLEAN)) {
fClustAnnotations <- list()
for(i in 1:length(run.CLEAN)) {
if(run.CLEAN[i]) {
l <- CLEAN(tree = gtr, allGenes=cdt[,1], CategoryID2GeneID=CategoryID2GeneID[[i]],
CategoryID2Desc = CategoryID2Desc[i], refGenes = unique(bkgList), maxSize=maxSize,
minSize=minSize, maxNumOfClusters=maxNumOfClusters, minGenesInCategory=minGenesInCategory,
maxGenesInCategory=maxGenesInCategory, verbose=verbose,inBkg=inBkg, sigFDR=sigFDR,
saveDataObjects=saveDataObjects, estimateNullDistribution = estimateNullDistribution)
names(l) <- names(CategoryID2GeneID)[i]
fClustAnnotations <- c(fClustAnnotations, l)
}
}
}
if(saveDataObjects) {
save(fClustAnnotations, file = paste(file,"RData",sep ="."))
}
invisible(fClustAnnotations)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.