#' Run WGCNA
block_with_parameter <- function(input_data, weight_power, minModuleSize=25,
reassignThreshold= 0, mergeCutHeight=0.15){
# This study will use unsigned network with Pearson correlation matrix
WGCNA::blockwiseModules(input_data, power = weight_power,
networkType = "unsigned",
minModuleSize = minModuleSize,
reassignThreshold = reassignThreshold,
mergeCutHeight = mergeCutHeight,
numericLabels = F,
pamRespectsDendro = F,
saveTOMs = F, saveTOMFileBase = "TOM",
verbose = 3)
}
plot_dendrogram <- function(net, directory){
if (!dir.exists (directory)){dir.create (directory)}
mergedColors <- net$colors
grDevices::png (paste (directory, 'dendrogram.png', sep='/'))
WGCNA::plotDendroAndColors(
net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05
)
dev.off()
}
#' Record genes of a particular module
write_module_gene <- function (datExpr, filename, net, save_dir, top_marks=NULL){
mergedColors <- net$colors
sink (paste (save_dir, filename, sep='/'))
for (color in unique(mergedColors)){
top_marker_genes <- colnames (datExpr)[net$colors == color]
if (is.null(top_marks)){
cat(paste ('#', color, '\n'))
cat(top_marker_genes)
cat ("\n-------------------------------------------------- \n")
}else{
cat (paste (color, ":"))
cat (as.character(top_marks[top_marks %in% top_marker_genes]))
cat ('\n')
}
}
sink()
}
wgcna_plot_save <- function (input_data, weight_power, show_markers, save_dir){
net <- block_with_parameter(input_data, weight_power)
plot_dendrogram (net, save_dir)
write_module_gene (input_data, 'module_gene.txt', net, save_dir)
write_module_gene (input_data, 'module_top_gene.txt', net, save_dir, top_marks = show_markers)
}
#' Select Seurat object by cell type
#'
#' @param x a Seurat object
#' @param top_markers should be a dataframe generated by the Seurat FindMarkers
#' gene. Otherwise, the top variable features will be selected
#' @param cluster_num if it is 'all', then all cells will be selected
#' @importFrom Seurat VariableFeatures
subset_celltype <- function (x, top_markers=NULL, cluster_num='EVT',
select_type='seurat_clusters', assay='RNA',
slot_data='data'){
if (is.null(top_markers)){var_genes <- VariableFeatures(x)
}else if (top_markers == 'all'){var_genes <- rownames (x)
}else{var_genes <- unique(top_markers$gene)}
datExpr <- t (as.matrix(Seurat::GetAssayData (x, assay=assay, slot=slot_data)
) [var_genes,]) # only use var.genes in analysis
if (cluster_num != 'all'){
datExpr <- datExpr [x@meta.data[, select_type]== cluster_num, ]
}
return (datExpr)
}
wgcna_celltype_plot <- function (dataset, top_markers, cluster_num,
weight_power, celltype, wgcna_dir,
select_type, num_gene_show=5){
save_dir <- paste (wgcna_dir, celltype, sep='/')
datExpr <- subset_celltype (dataset, top_markers, cluster_num, select_type)
cluster <- top_markers[top_markers$cluster == cluster_num, ]
celltype_gene <- cluster %>% dplyr::top_n (n = num_gene_show, wt = avg_logFC)
wgcna_plot_save (datExpr, weight_power, celltype_gene$gene, save_dir)
}
# ========================= Determine Weight Power=========================
determine_power <- function(datExpr, save_file){
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
sft = WGCNA::pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
grDevices::png (save_file, width=580, height=360)
# Plot the results:
#sizeGrWindow(9, 5)
graphics::par(mfrow = c(1,2))
cex1 = 0.9
# Scale-free topology fit index as a function of the soft-thresholding power
graphics::plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
graphics::text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
graphics::abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
graphics::plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
graphics::text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
grDevices::dev.off()
return (sft$fitIndices)
}
determine_power_auto <- function (datExpr, save_dir){
if (!dir.exists (save_dir)){dir.create (save_dir)}
power_trend<- determine_power (datExpr, paste (save_dir, 'choose_power.png', sep='/'))
r_sq <- power_trend$SFT.R.sq
if (max(r_sq) >=0.9){
weight_power<- min(power_trend$Power [r_sq >= 0.9])
}else{
weight_power<- power_trend$Power [which (r_sq == max(r_sq))]
}
return (weight_power)
}
# =========================VISUALISATION =========================
visualise_module <- function (datExpr, TOM, weight_power, net, module, celltype,
save_dir, sim_mat, nTop=30){
colnames(TOM) <- colnames(datExpr)
rownames(TOM) <- colnames(datExpr)
if (module != 'all'){
inModule <- (net$colors==module)
}else{inModule <- rep(TRUE, length(net$colors))}
# Select the corresponding Topological Overlap
modTOM <- TOM[inModule, inModule]
IMConn <- WGCNA::softConnectivity(datExpr[, inModule])
top <- (rank(-IMConn) <= nTop)
# plotting
plot_mat<- modTOM*sign(sim_mat [inModule, inModule])
plot_mat <- network::network(plot_mat[top, top],
matrix.type = "adjacency",
ignore.eval = FALSE,
names.eval = "weights")
plot_color <- data.table::fifelse(plot_mat %e% "weights"> 0., "red", "blue")
plot_weight <- abs(plot_mat %e% "weights")
ggnet2::ggnet2 (plot_mat, label=TRUE, alpha=0.35, edge.color=plot_color, edge.size=plot_weight)
ggplot2::ggsave (paste (save_dir, paste (module, '_module.png', sep=''), sep='/'))
}
visualise_all_modules <- function (datExpr, weight_power, celltype, save_dir, nTop=30){
save_dir <- paste(save_dir, celltype, sep='/')
if (!dir.exists (save_dir)){dir.create (save_dir)}
# determine weight power
weight_power <- determine_power_auto (datExpr, save_dir)
# document
sink (paste (save_dir, 'weight_power.txt', sep='/'))
cat (paste('The weight power used to calculate the adjacency matrix in this experiment is', weight_power))
cat (paste ('\n', Sys.time()))
sink()
sim_mat <- WGCNA::cor (datExpr, method="pearson")
adj_mat <- adjacency.fromSimilarity (sim_mat,power=weight_power)
TOM <- TOMsimilarity(adj_mat)
# Recalculate topological overlap
net <- block_with_parameter (datExpr, weight_power)
all_modules <- c(unique (net$colors), 'all')
for (module in all_modules){
print (paste('Making graph for', module, 'module'))
visualise_module (datExpr, TOM, weight_power, net, module, celltype,
save_dir, sim_mat)
}
}
WGCNA_network_data <- function (datExpr, weight_power, save_hist,
show_weight_threshold=0.5,
show_weight_percentile=0.9){
datExpr_noribo <- remove_ribosomal_genes (datExpr, row_is_gene=FALSE)
print (dim(datExpr_noribo))
sim_mat <- WGCNA::cor (datExpr_noribo, method="pearson")
adj_mat <- WGCNA::adjacency.fromSimilarity (sim_mat,power=weight_power)
TOM <- WGCNA::TOMsimilarity(adj_mat)
#con_mat <- as.data.frame(TOM*sign(sim_mat))
con_mat <- as.data.frame(sim_mat)
rownames (con_mat) <- colnames (datExpr_noribo)
colnames (con_mat) <- colnames (datExpr_noribo)
sim_red <- NU$graph_embedding (datExpr_noribo)
net <- block_with_parameter (datExpr_noribo, weight_power = weight_power)
con_mat_final <- NU$prepare_network_plot (con_mat, datExpr_noribo,
net$colors, save_hist,
sim_red,
show_weight_threshold,
show_weight_percentile)
all_cluster <- length (unique (con_mat_final$cluster))
cluster_level <- paste ( 'cluster', 1:all_cluster, sep='_')
con_mat_final$cluster <- cluster_level [factor (con_mat_final$cluster) ]
return (con_mat_final)
}
WGCNA_all <- function (x, top_markers, wgcna_dir, celltype, feature='reassign',
weight_thresh=NULL, weight_perc=0.999, min_express=1,
return_con_mat=F){
datExpr <- subset_celltype (x, top_markers, celltype, feature)
datExpr <- datExpr [, colMeans (datExpr) > min_express]
print ('determining power')
weight_power <- determine_power_auto (datExpr, paste (wgcna_dir, celltype, sep='/'))
print ('running WGCNA')
con_mat_final <- WGCNA_network_data (datExpr, weight_power=weight_power,
save_hist=paste (wgcna_dir, celltype, 'weight_hist.png', sep='/'),
show_weight_threshold=weight_thresh,
show_weight_percentile=weight_perc)
print ('plotting network')
NU$plot_network (con_mat_final, paste (wgcna_dir, celltype, 'corr_pca.png', sep='/'))
if (return_con_mat ) {return (con_mat_final)}
}
# ------------------
# Eigengene analysis
# ------------------
#' Find eigengene of WGCNA modules
#'
#' @description Calculate the WGCNA connectivity matrix, perform hierarchical
#' clustering, cut tree, and then eigengene values. Weight power is
#' automatically determined
#' @param x a Seurat object
#' @param save_dir where to store the genes of each WGCNA cluster
#' @param cluster_num on which cell cluster WGCNA is performed. 'all' means the
#' entire dataset is involved
#' @param select_type where the cluster information is stored.
#' @param ... other parameters to pass to `block_with_parameters` which
#' corresponds to `WGCNA::blockwiseModules`
#' @export
find_eigengene <- function (x, save_dir, cluster_num='all',
select_type='revised', save_eigen=T,
return_eigengene=F, top_markers=NULL, ...){
save_dir <- paste (save_dir, 'WGCNA', sep='/')
if (!dir.exists (save_dir)){dir.create (save_dir) }
datExpr <- subset_celltype(x, cluster_num=cluster_num,
top_markers=top_markers, select_type=select_type)
print ('determining the power for scale-free network')
weight_power <- determine_power_auto (datExpr, save_dir)
#datExpr_noribo <- remove_ribosomal_genes (datExpr, row_is_gene=FALSE)
net <- block_with_parameter (datExpr, weight_power, ...)
print ('saving gene clusters')
if (save_eigen) {save_eigengene (net, save_dir)}
if (return_eigengene){
print ('calculating eigengenes')
eig_genes <- WGCNA::moduleEigengenes (datExpr, net$colors)
datME<- eig_genes$eigengene
return (datME)
}
}
make_eigen_heatmap <- function (x, datME, group.by, ...){
heat_net <- Seurat::CreateSeuratObject (t(as.matrix (datME)), meta.data=x@meta.data)
color_row <- colnames (datME)
seurat_heat (heat_net, group.by, color_row, slot_data = 'counts', ...)
}
save_eigengene <- function (net, wgcna_dir) {
color_list <- list ()
all_colors <- unique (net$colors)
for (i in 1:length (all_colors) ){
color_list [[i]] <- names(net$colors [net$colors == all_colors [i] ])
}
max_genes <- max ( unlist(lapply (color_list, length) ))
color_df <- matrix ("", max_genes, length (all_colors))
for (i in 1:length (all_colors) ){
color_df [1:length (color_list[[i]]) ,i] <- color_list [[i]]
}
new_colors <- colors2labels (all_colors, prefix='GC')
colnames (color_df) <- new_colors
color_vec <- new_colors [match (net$colors, all_colors) ]
print (class (color_vec))
print (dim (color_vec))
names (color_vec) <- names (net$colors)
utils::write.csv (color_df, paste (wgcna_dir, 'module_genes.csv', sep='/') )
utils::write.csv (color_vec, paste (wgcna_dir, 'module_net_genes.csv', sep='/') )
}
#' Convert WGCNA generated names to cluster names
#'
#' @export
colors2labels <- function (color_vec, prefix='cluster'){
color_vec %>% as.factor () %>% as.numeric () -> num_vec
num_vec <- paste (prefix, num_vec, sep='')
return (gtools::mixedsort (num_vec))
}
#' Calculate intra modular connectivity
#'
#' @param x a Seurat object
#' @param ... parameters for `subset_celltype`
#' @importFrom magrittr %>%
#' @export
hub_score <- function (x, weight_power, save_dir, ...){
datExpr <- subset_celltype(x, cluster_num='all', top_markers='all',
select_type='revised', ...)
# calculate the adjacency matrix
ADJ <- abs (WGCNA::cor (datExpr, use='p'))^weight_power
# sorting out the names
color_net <- utils::read.csv (paste (save_dir,
'WGCNA/module_net_genes.csv', sep='/'))
color_net %>% tibble::deframe () -> color_net
if (length(color_net) > nrow (ADJ)){
print ('removing genes not in the adjacent matrix')
color_net <- color_net [match (rownames (ADJ), names (color_net))]
}
intra_net <- WGCNA::intramodularConnectivity(ADJ, color_net)
intra_net %>% tibble::add_column (cluster=color_net) %>%
tibble::add_column (gene=rownames (intra_net))
}
#' Save the hub score data frame
#'
#' @param markers a dataframe generated from `find_DE_genes`
#' @param hub_df a dataframe generated from `hub_score`
#' @importFrom magrittr %>%
#' @export
save_hub_score <- function (markers, hub_df, clusters, cell_types, save_dir,
other_celltypes=NULL, sel_columns=NULL){
all_df <- list()
if (length (cell_types)==1){cell_types <- rep (cell_types, length (clusters))}
for (i in 1:length(clusters)){
sel_hub <- hub_df %>% dplyr::filter (cluster == clusters[i])
markers %>% dplyr::filter (group %in% c(cell_types[i])) -> sel_mark
sel_mark <- sel_mark [match (sel_hub$gene, sel_mark$feature), ]
all_df [[i]] <- cbind (sel_hub, sel_mark)
}
final_dir <- paste (save_dir, paste ('TF_', cell_types[1], '.csv', sep=''), sep='/')
do.call (rbind, all_df) %>% dplyr::arrange (dplyr::desc (kWithin) ) -> save_csv
if (!is.null (sel_columns)){ save_csv %>% dplyr::select (dplyr::all_of (sel_columns)) -> save_csv }
utils::write.csv (save_csv, final_dir, quote=F, row.names=F)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.