#' Construct WGCNA modules and associated data
#'
#' Make WGCNA modules from gene expression data. Also outputs mean or eigenvalue module expression and DAVID formatted gene lists
#'
#' @param fit List object output by fit_modules( )
#' @param Rsq_min Numeric minimum R-squared for soft threshold selection. If set, sft_value is ignored
#' @param sft_value Numeric soft threshold. Set when minimum R-squared is ignored
#' @param minModuleSize Numeric minimum module size. Default is 20
#' @param maxBlockSize Integer giving maximum block size for module detection. Default is 500
#' @param deepSplit Integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive
#' @param networkType Character string. Network type. One of "unsigned", "signed", "signed hybrid". Default is "signed"
#' @param TOMType Character string. One of "none", "unsigned", "signed", "signed Nowick", "unsigned 2", "signed 2", or "signed Nowick 2". If "none", adjacency will be used for clustering. See TOMsimilarityFromExpr for details. Default is "signed"
#' @param mods_mean Logical if include mean module expression in output
#' @param mods_eigen Logical if include eigenvalue module expression in output
#' @param david Logical if include DAVID formatted genes in modules in output
#' @param nThread Integer for number of threads to use
#'
#' @param Rsq.min Deprecated form of Rsq_min
#' @param sft.value Deprecated form of sft_value
#' @param mods.mean Deprecated form of mods_mean
#' @param mods.eigen Deprecated form of mods_eigen
#'
#' \itemize{
#' \item{genes} Character vector of genes used in module building
#' \item{sft} Data frame with soft thresholding selected for module building. Includes power, minimum R-squared, and connectivity
#' \item{top.plot} ggplot object of soft thresholding topology
#' \item{connect.plot} ggplot object of soft thresholding connectivity
#' \item{mods} Data frame of genes in modules
#' \item{mods.mean} Data frame of mean module expression for each library
#' \item{mods.eigen} Data frame of module eigenvalue expression for each library
#' \item{david} DAVID formatted data frame of genes in modules
#' }
#' @export
#'
#' @examples
#' fit <- fit_modules(dat = example.voom)
#' dat.mods <- make_modules(fit = fit, sft_value = 14,
#' mods_mean = TRUE, mods_eigen = TRUE, david = TRUE)
make_modules <- function(fit,
Rsq_min = NULL, sft_value = NULL,
minModuleSize = 20, maxBlockSize=500, deepSplit = 3,
networkType="signed", TOMType="signed",
mods_mean = FALSE, mods_eigen = FALSE, david = FALSE,
nThread=2,
Rsq.min = NULL, sft.value = NULL,
mods.mean = FALSE, mods.eigen = FALSE){
SFT.R.sq <- Power <- module <- geneName <- module.char <- NULL
#Back compatibility
if(!is.null(Rsq.min)){Rsq_min <- Rsq.min}
if(!is.null(sft.value)){sft_value <- sft.value}
if(mods.mean){mods_mean <- mods.mean}
if(mods.eigen){mods_eigen <- mods.eigen}
##### Soft-thresholding #####
#Select threshold
if(!is.null(Rsq_min)){
sft.select <- dplyr::filter(fit$sft, SFT.R.sq >= Rsq_min)
if(nrow(sft.select)>0){
power.t <- min(sft.select$Power)
} else {
stop("R-squared minimum not reached. Please input lower Rsq_min or set sft_value instead.")
}
} else if(!is.null(sft_value)){
sft.select <- dplyr::filter(fit$sft, Power == sft_value)
power.t <- unique(sft.select$Power)
} else{
stop("Please set Rsq_min or sft_value.")
}
##### Build modules #####
cor <- WGCNA::cor #Reassign cor fxn to prevent namespace error with stats
mod.net <- WGCNA::blockwiseModules(t(fit$dat$E),
power=power.t,
networkType=networkType,
TOMType=TOMType,
maxBlockSize=maxBlockSize,
minModuleSize=minModuleSize,
deepSplit=deepSplit,
numericLabels=TRUE,
saveTOMFileBase="TOM-blockwise",
nthreads=nThread)
cor <- stats::cor #Revert cor fxn assignment
mods <- as.data.frame(mod.net$colors) %>%
tibble::rownames_to_column("geneName") %>%
dplyr::rename(module = "mod.net$colors") %>%
#add leading 0 to module names for correct sorting of factor
dplyr::mutate(module.char = ifelse(module <= 9,
paste("0", module, sep=""),
module)) %>%
#Add color var
dplyr::mutate(mod.color = WGCNA::labels2colors(mod.net$colors))
#Add gene info if present
if(!is.null(fit$dat$genes)){
mods <- mods %>%
dplyr::left_join(fit$dat$genes, by="geneName")
}
##### Combine results into list #####
dat.mods <- list()
dat.mods[["genes"]] <- fit$genes
dat.mods[["mods"]] <- mods
dat.mods[["sft"]] <- sft.select
##### plot #####
dat.mods[["top.plot"]] <- fit$top.plot +
ggplot2::geom_hline(ggplot2::aes(yintercept = sft.select$SFT.R.sq[1]), color="red") +
ggplot2::geom_vline(xintercept = power.t, color="red")
dat.mods[["connect.plot"]] <- fit$connect.plot +
ggplot2::geom_hline(ggplot2::aes(yintercept = sft.select$mean.k.[1]), color="red") +
ggplot2::geom_vline(xintercept = power.t, color="red")
##### Mean module expression #####
if(mods_mean){
mods.voom <- mods %>%
#Combine count and module data
dplyr::select(geneName, module.char) %>%
dplyr::left_join(tibble::rownames_to_column(as.data.frame(fit$dat$E), "geneName"),
by="geneName") %>%
#Calculate mean by module
dplyr::group_by(module.char) %>%
dplyr::summarise_if(is.numeric, mean, na.rm = TRUE) %>%
tibble::rownames_to_column() %>%
#Make module names
dplyr::mutate(rowname=paste("module", module.char, sep="_")) %>%
tibble::column_to_rownames()
dat.mods[["mods.mean"]] <- mods.voom
}
##### Eigenvalue expression #####
if(mods_eigen){
mods.E <- mod.net$MEs %>%
t() %>% as.data.frame() %>%
tibble::rownames_to_column("module.char") %>%
dplyr::mutate(module.char = as.numeric(gsub("ME","", module.char))) %>%
dplyr::mutate(module.char = ifelse(module.char <= 9,
paste("0", module.char, sep=""),
module.char)) %>%
dplyr::arrange(module.char)
rownames(mods.E) <- paste0("module_", mods.E$module.char)
dat.mods[["mods.eigen"]] <- mods.E
}
##### DAVID format #####
if(david){
#list modules
mod.names <- sort(rownames(mods.voom))
#calculate maximum module gene list length
max.mod <- max(table(mod.net$colors))
#Create data frame with that number of rows
david.df <- data.frame(rowname=1:max.mod)
for (i in 1:length(mod.names)){
#Create module name as "module#"
mod.name <- mod.names[i]
#Filter gene list to module of interest
gene.list <- mods %>%
dplyr::filter(module == i-1) %>%
dplyr::select(geneName)
#Calculate the total number of rows that need to be added for nrows to match
add.genes <- max.mod - nrow(gene.list)
#Add NAs to fill out gene.list and convert to data frame for merging
gene.list <- c(gene.list$geneName, rep(NA, times=add.genes))
gene.list <- as.data.frame(gene.list)
colnames(gene.list) <- mod.name
#Combine module lists and rename to mod.name
david.df <- david.df %>%
dplyr::bind_cols(gene.list)
}
dat.mods[["david"]] <- david.df
}
return(dat.mods)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.