#' ClusterGeneSets
#'
#' @import stats
#' @import methods
#' @import limma
#'
#' @importFrom stats dist hclust kmeans
#'
#' @param Object A PathwayObject
#' @param clusters A numeric with the number of clusters required
#' @param method The cluster method specified, Options are kmeans, kmeans_group,
#' Hierarchical and Hierarchical_group
#' @param order How should the data be ordered, by group or by cluster
#' @param molecular.signature to filter the molecular signatures which are duplicated
#' before clustering. options are Unique or All
#' @param user_function A user supplied function for clustering
#'
#'
#' @return PathwayObject
#' @export
#'
#' @examples
#' Great.files <- c(system.file("extdata", "MM10.GREAT.KO.uGvsMac.bed.tsv",
#' package = "GeneSetCluster"),
#' system.file("extdata", "MM10.GREAT.KO.uGvsMac.bed_BCKGRND.tsv", package = "GeneSetCluster"),
#' system.file("extdata", "MM10.GREAT.WT.uGvsMac.bed.tsv", package = "GeneSetCluster"),
#' system.file("extdata", "MM10.GREAT.WT.uGvsMac.bed_BCKGRND.tsv", package = "GeneSetCluster"))
#' Great.files.bckgrnd <- Great.files[grepl("BCKGRND", Great.files)]
#'
#'
#' Great.bckgnrd.Object1 <- LoadGeneSets(file_location = Great.files.bckgrnd,
#' groupnames= c("KO", "WT"),
#' P.cutoff = 0.05,
#' Mol.cutoff = 5,
#' Source = "Great",
#' Great.Background = TRUE,
#' type = "Canonical_Pathways",
#' topranks = 20,
#' structure = "SYMBOL",
#' Organism = "org.Mm.eg.db",
#' seperator = ",")
#' man.Great.Object1 <- ManageGeneSets(Object = Great.bckgnrd.Object1,
#' keep.type =c("Disease Ontology",
#' "GO Biological Process" ),
#' exclude.type="")
#' man.Great.Object2 <- CombineGeneSets(Object = man.Great.Object1)
#' man.Great.Object3 <- ClusterGeneSets(Object = man.Great.Object2,
#' clusters = 5,
#' method = "kmeans")
#'
#'
#'
ClusterGeneSets <- function(Object, clusters = 5, method = "kmeans", order = "group", molecular.signature = "All", user_function)
{
message("[=========================================================]")
message("[<<<< ClusterGeneSets START >>>>>]")
message("-----------------------------------------------------------")
if(nrow(Object@Data.RR)==0)
{
message("Make sure youre object has been combines by CombineGeneSets")
stop()
}
#Currently supported ways of clustering
#Kmeans #kmeans_group #mclust #mclust_group #Hierarchical #Hierarchical_group
#Correct spelling mistakes#
if(method %in% c("kmeans", "Kmeans")){method <- "kmeans"}
if(method %in% c("kmeans_group","Kmeans_group","Kmeans_Group", "kmeans group","Kmeans group")){method <- "kmeans_group"}
#
if(method %in% c("HierArchical", "hierarchical", "Hierarchical")){method <- "Hierarchical"}
if(method %in% c("Hierarchical_group","hierarchical_group", "Hierarchical group","hierarchical group", "hierarchical_group")){method <- "Hierarchical_group"}
#####################################################
if(clusters > nrow(Object@Data[[1]]))
{
message("Too many cluster inputted, reducing it to the number of datapoints")
clusters <- nrow(Object@Data[[1]])
}
if((length(clusters) > 1 & !grepl("group", method)))
{
message("Too many different cluster options inputted, reducing it to the number of datapoints")
clusters <- clusters[1]
}
if((!length(clusters) == nrow(Object@PData) & grepl("group", method)))
{
message("please input a vector with the same length as the number of groups, reducing it to the 1st number")
clusters <- clusters[1]
}
if(sum(method %in% c("kmeans", "kmeans_group", "Hierarchical", "Hierarchical_group")) < 1)
{
message("Method is not a supported method of clustering")
stop()
}
if(length(method) > 1)
{
message("Too many methods supplied, pick between Kmeans, kmeans_group, mclust, mclust_group")
stop()
}
####################################
#------------Combine DF------------#
####################################
canonical.df <-Object@Data[[1]]
if(molecular.signature == "All")
{
message("Using all Gene sets")
canonical.df <-Object@Data[[1]]
}
if(molecular.signature == "Unique")
{
message("Using Unique Gene sets")
if(Object@metadata[,"display"] == "Condensed")
{
warning("Display is set to Condensed, this will lead to a loss of information")
warning("Advised is to set display to Expanded before filtering the unique molecular.signature")
}
Object@Data.RR <- Object@Data.RR[!duplicated(canonical.df$Molecules),
!duplicated(canonical.df$Molecules)]
canonical.df <- canonical.df[!duplicated(canonical.df$Molecules),]
Object@metadata[,"mol.signature"] <- rep("Unique", times = nrow(metadata))
}
####################################################
#------------Kmeans of clusters--------------------#
####################################################
if(method == "kmeans")
{
message("Running kmeans")
if(length(clusters) > 1)
{
message("Too many clusters supplied, Taking first cluster")
clusters <- clusters[1]
}
canonical.df$cluster <- kmeans(x = Object@Data.RR, centers = clusters)$cluster
}
##############################################################
#------------Kmeans of clusters per group--------------------#
##############################################################
if(method == "kmeans_group")
{
print("Running kmeans per group seperatly")
if(!length(clusters) == nrow(Object@PData))
{
message("Wrong number of clusters supplied")
stop()
}
temp.cl2 <- vector()
for(i in 1:length(unique(as.character(Object@Data[[1]]$Groups))))
{
idx.ls <- Object@Data[[1]][as.character(Object@Data[[1]]$Groups) == as.character(Object@PData[i,"Groupnames"]),"RR_name"]
rr.temp <- Object@Data.RR[idx.ls,idx.ls]
temp.cl1 <- kmeans(x = rr.temp, centers = clusters[i])$cluster
if(i >1){temp.cl1 <- temp.cl1 + max(temp.cl2)}
temp.cl2 <- c(temp.cl2, temp.cl1)
}
canonical.df$cluster <- temp.cl2
}
#########################################################
#------------Hierarchical clustering--------------------#
#########################################################
if(method == "Hierarchical")
{
message("Running Hierarchical")
if(length(clusters) > 1){message("Too many clusters supplied, Taking first cluster")}
canonical.df$cluster <- cutree(hclust(dist(t(Object@Data.RR)), method = "ward.D2"), k = clusters)
}
#########################################################
#------------Hierarchical clustering--------------------#
#########################################################
if(method == "Hierarchical_group")
{
message("Running Hierarchical per group")
if(length(clusters) > 1){message("Too many clusters supplied, Taking first cluster")}
temp.cl2 <- vector()
for(i in 1:nrow(Object@PData))
{
idx.ls <- Object@Data[[1]][as.character(Object@Data[[1]]$Group) == as.character(Object@PData[i,"Groupnames"]),"RR_name"]
rr.temp <- Object@Data.RR[idx.ls,idx.ls]
temp.clx<- cutree(hclust(dist(t(rr.temp)), method = "ward.D2"), k = clusters)
temp.cl1 <- hclust(dist(t(rr.temp)), method = "ward.D2")$order
if(i >1){temp.cl1 <- temp.cl1 + max(temp.cl2)}
temp.cl2 <- c(temp.cl2, temp.cl1)
}
canonical.df$cluster <- temp.cl2
}
#########################################################
#------------User defined clustering--------------------#
#########################################################
if(method == "User_supplied")
{
message("Running User defined clustering function")
canonical.df$cluster <- user_function(Object@Data.RR)
}
############################################
#---------Adding cluster info--------------#
############################################
canonical.df$mean.RR.cl <- NA
canonical.df$sum.RR.cl <- NA
for(cl.i in unique(canonical.df$cluster))
{
DF.cl <- canonical.df[canonical.df$cluster == cl.i,]
idx <- DF.cl$RR_name
rr.x <- Object@Data.RR[idx, idx]
rr.x <- data.frame(sapply(rr.x, function(x) as.numeric(as.character(x))))
rr.x <- as.matrix(rr.x)
DF.cl$mean.RR.cl <- round(mean(rr.x),digits = 4)
DF.cl$sum.RR.cl <- round(sum(rr.x),digits = 4)
canonical.df[canonical.df$cluster == cl.i,] <- DF.cl
}
###############################
#--------Order clusters-------#
###############################
message("Ordering pathway clusters")
if(order == "group")
{
Object@Data <- list(canonical.df[order(canonical.df$Group,canonical.df$cluster, decreasing = F),])
Object@metadata[,"order.group"] <- rep(order, times = nrow(Object@metadata))
}
if(order == "cluster")
{
Object@Data <- list(canonical.df[order(canonical.df$cluster, decreasing = F),])
Object@metadata[,"order.group"] <- rep(order, times = nrow(Object@metadata))
}
if(order == "mean.RR")
{
Object@Data <- list(canonical.df[order(canonical.df$mean.RR.cl, decreasing = F),])
Object@metadata[,"order.group"] <- rep(order, times = nrow(Object@metadata))
}
if(order == "Sum.RR")
{
Object@Data <- list(canonical.df[order(canonical.df$sum.RR.cl, decreasing = F),])
Object@metadata[,"order.group"] <- rep(order, times = nrow(Object@metadata))
}
Object@metadata[,"cluster.method"] <- rep(method, times = nrow(Object@metadata))
Object@Data.RR <- Object@Data.RR[Object@Data[[1]]$RR_name,Object@Data[[1]]$RR_name]
##########################
#--------plot Info-------#
##########################
pal.c <- c(brewer.pal(n = 8, name ="Accent" ),
brewer.pal(n = 8, name ="Dark2" ),
brewer.pal(n = 8, name ="Set3"),
brewer.pal(n = 8, name ="Set1"))
if(Object@metadata$display[1] == "Expanded")
{
#Display is expanded meaning groups get marked seperatly
if(length(unique(as.character(Object@Data[[1]]$Type))) > 1)
{
aka2 <- matrix(data = NA,
nrow = nrow((Object@Data.RR)),
ncol = 3 +nrow(Object@metadata))
colnames(aka2) <- c("Group", "Cluster", "Type", unique(Object@metadata$Groups))
aka2[,"Type"] <- as.character(Object@Data[[1]]$Type)
pal.type <- pal.c[1:length(unique(aka2[,"Type"]))]
names(pal.type) <- unique(aka2[,"Type"])
for(groups.i in 1:nrow(Object@metadata))
{
aka2[,Object@metadata$Groups[groups.i]] <- Object@Data[[1]][,Object@metadata$Groups[groups.i]]
}
}else{
aka2 <- matrix(data = NA, nrow = nrow((Object@Data.RR)), ncol = 2+ nrow(Object@metadata))
colnames(aka2) <- c("Group", "Cluster", unique(Object@metadata$Groups))
for(groups.i in 1:nrow(Object@metadata))
{
aka2[,Object@metadata$Groups[groups.i]] <- Object@Data[[1]][,Object@metadata$Groups[groups.i]]
}
}
}else{
if(length(unique(as.character(Object@Data[[1]]$Type))) > 1)
{
aka2 <- matrix(data = NA, nrow = nrow((Object@Data.RR)), ncol = 3)
colnames(aka2) <- c("Group", "Cluster", "Type")
aka2[,"Type"] <- as.character(Object@Data[[1]]$Type)
pal.type <- pal.c[1:length(unique(aka2[,"Type"]))]
names(pal.type) <- unique(aka2[,"Type"])
}else{
aka2 <- matrix(data = NA, nrow = nrow((Object@Data.RR)), ncol = 2)
colnames(aka2) <- c("Group", "Cluster")
}
}
rownames(aka2) <- Object@Data[[1]]$RR_name
aka2[,"Group"] <- as.character(Object@Data[[1]]$Groups)
aka2[,"Cluster"] <- as.character(Object@Data[[1]]$cluster)
aka2 <- as.data.frame(aka2)
if(length(unique(Object@Data[[1]]$cluster)) > 32)
{
message("Warning: number of cluster is larger than colours supported")
}
groups.col <- brewer.pal(n = 8, name ="Set2" )[1:length(unique(aka2[,"Group"]))]
names(groups.col) <- unique(aka2[,"Group"])
pal <- pal.c[1:length(unique(aka2[,"Cluster"]))]
names(pal) <- unique(aka2[,"Cluster"])
if(Object@metadata$display[1] == "Expanded")
{
if(length(unique(as.character(Object@Data[[1]]$Type))) > 1)
{
aka3 = list(Group = groups.col,
Cluster= pal,
Type = pal.type)
for(groups.i in 1:nrow(Object@metadata))
{
vector.x <- c("black","red")
names(vector.x) <- c(0,1)
aka3[[Object@metadata$Groups[groups.i]]] <- vector.x
}
names(aka3) <- c("Group", "Cluster", "Type", unique(Object@metadata$Groups))
}else{
aka3 = list(Group = groups.col,
Cluster= pal)
for(groups.i in 1:nrow(Object@metadata))
{
vector.x <- c("black","red")
names(vector.x) <- c(0,1)
aka3[[Object@metadata$Groups[groups.i]]] <- vector.x
}
names(aka3) <- c("Group", "Cluster", unique(Object@metadata$Groups))
}
}else{
if(length(unique(as.character(Object@Data[[1]]$Type))) > 1)
{
aka3 = list(Group = groups.col,
Cluster= pal,
Type = pal.type)
names(aka3) <- c("Group", "Cluster", "Type")
}else{
aka3 = list(Group = groups.col,
Cluster= pal)
names(aka3) <- c("Group", "Cluster")
}
}
##################################
#-----------Return---------------#
##################################
Object@plot$aka2 = aka2
Object@plot$aka3 = aka3
message("-----------------------------------------------------------")
message("[<<<<< ClusterGeneSets END >>>>>>]")
message("[=========================================================]")
message("[You may want to process HighlightGeneSets next. ]")
message("[You may want to plot the results using PlotGeneSets next. ]")
return(Object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.