R/WGCNA_utils.R

Defines functions determine_power plot_dendrogram block_with_parameter

Documented in block_with_parameter

#' Run WGCNA
block_with_parameter <- function(input_data, weight_power, minModuleSize=25,
                                 reassignThreshold= 0, mergeCutHeight=0.15){
        # This study will use unsigned network with Pearson correlation matrix
        WGCNA::blockwiseModules(input_data, power = weight_power,
                  networkType = "unsigned", 
                  minModuleSize = minModuleSize,
                  reassignThreshold = reassignThreshold, 
                  mergeCutHeight = mergeCutHeight,
                  numericLabels = F, 
                  pamRespectsDendro = F,
                  saveTOMs = F, saveTOMFileBase = "TOM", 
                  verbose = 3)
}

plot_dendrogram <- function(net, directory){
        if (!dir.exists (directory)){dir.create (directory)}
        mergedColors <- net$colors
        grDevices::png (paste (directory, 'dendrogram.png', sep='/'))
        WGCNA::plotDendroAndColors(
                net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                "Module colors",
                dendroLabels = FALSE, hang = 0.03,
                addGuide = TRUE, guideHang = 0.05
        )
        dev.off()
}

#' Record genes of a particular module
write_module_gene <- function (datExpr, filename, net, save_dir, top_marks=NULL){
        mergedColors <- net$colors
        sink (paste (save_dir, filename, sep='/'))
                for (color in unique(mergedColors)){
                        top_marker_genes <- colnames (datExpr)[net$colors == color]
                        if (is.null(top_marks)){
                                cat(paste ('#', color, '\n'))
                                cat(top_marker_genes)
                                cat ("\n-------------------------------------------------- \n")
                        }else{
                                cat (paste (color, ":"))
                                cat (as.character(top_marks[top_marks %in% top_marker_genes]))
                                cat ('\n')
                        }

                }
        sink()
}

wgcna_plot_save <- function (input_data, weight_power, show_markers, save_dir){
        net <- block_with_parameter(input_data, weight_power)
        plot_dendrogram (net, save_dir)
        write_module_gene (input_data, 'module_gene.txt', net, save_dir)
        write_module_gene (input_data, 'module_top_gene.txt', net, save_dir, top_marks = show_markers)
}

#' Select Seurat object by cell type
#'
#' @param x a Seurat object
#' @param top_markers should be a dataframe generated by the Seurat FindMarkers
#' gene. Otherwise, the top variable features will be selected
#' @param cluster_num if it is 'all', then all cells will be selected
#' @importFrom Seurat VariableFeatures
subset_celltype <- function (x, top_markers=NULL, cluster_num='EVT',
                             select_type='seurat_clusters', assay='RNA',
                             slot_data='data'){
        if (is.null(top_markers)){var_genes <- VariableFeatures(x)
        }else if (top_markers == 'all'){var_genes <- rownames (x)
        }else{var_genes <- unique(top_markers$gene)}
        datExpr <- t (as.matrix(Seurat::GetAssayData (x, assay=assay, slot=slot_data)
                                ) [var_genes,])  # only use var.genes in analysis 
        if (cluster_num != 'all'){
                datExpr <- datExpr [x@meta.data[, select_type]== cluster_num, ]
        }
        return (datExpr)
}

wgcna_celltype_plot <- function (dataset, top_markers, cluster_num,
                                 weight_power, celltype, wgcna_dir,
                                 select_type, num_gene_show=5){
        save_dir <- paste (wgcna_dir, celltype, sep='/')
        datExpr <- subset_celltype (dataset, top_markers, cluster_num, select_type)
        cluster <- top_markers[top_markers$cluster == cluster_num, ]
        celltype_gene <- cluster %>% dplyr::top_n (n = num_gene_show, wt = avg_logFC)
        wgcna_plot_save (datExpr, weight_power, celltype_gene$gene, save_dir)
}


# ========================= Determine Weight Power=========================
determine_power <- function(datExpr, save_file){
        powers = c(c(1:10), seq(from = 12, to=20, by=2))
        # Call the network topology analysis function
        sft = WGCNA::pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)

        grDevices::png (save_file, width=580, height=360)
        # Plot the results:
        #sizeGrWindow(9, 5)
        graphics::par(mfrow = c(1,2))
        cex1 = 0.9
        # Scale-free topology fit index as a function of the soft-thresholding power
        graphics::plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
        xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
        main = paste("Scale independence"));
        graphics::text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
        labels=powers,cex=cex1,col="red");
        # this line corresponds to using an R^2 cut-off of h
        graphics::abline(h=0.90,col="red")

        # Mean connectivity as a function of the soft-thresholding power
        graphics::plot(sft$fitIndices[,1], sft$fitIndices[,5],
        xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
        main = paste("Mean connectivity"))
        graphics::text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
        grDevices::dev.off()
        return (sft$fitIndices)
}

determine_power_auto <- function (datExpr, save_dir){
        if (!dir.exists (save_dir)){dir.create (save_dir)}
        power_trend<- determine_power (datExpr, paste (save_dir, 'choose_power.png', sep='/'))
        r_sq <- power_trend$SFT.R.sq
        if (max(r_sq) >=0.9){
                weight_power<- min(power_trend$Power [r_sq >= 0.9])
        }else{
                weight_power<- power_trend$Power [which (r_sq == max(r_sq))]
        }
        return (weight_power)
}

# =========================VISUALISATION =========================
visualise_module <- function (datExpr, TOM, weight_power, net, module, celltype,
                              save_dir, sim_mat, nTop=30){
        colnames(TOM) <-  colnames(datExpr)
        rownames(TOM) <-  colnames(datExpr)
        if (module != 'all'){
                inModule <- (net$colors==module)
        }else{inModule <- rep(TRUE, length(net$colors))}

        # Select the corresponding Topological Overlap
        modTOM <- TOM[inModule, inModule]
        IMConn <- WGCNA::softConnectivity(datExpr[, inModule])
        top <- (rank(-IMConn) <= nTop)

        # plotting
        plot_mat<- modTOM*sign(sim_mat [inModule, inModule])
        plot_mat <- network::network(plot_mat[top, top],
                      matrix.type = "adjacency",
                      ignore.eval = FALSE,
                      names.eval = "weights")
        plot_color <- data.table::fifelse(plot_mat %e% "weights"> 0., "red", "blue")
        plot_weight <- abs(plot_mat %e% "weights")
        ggnet2::ggnet2 (plot_mat, label=TRUE, alpha=0.35, edge.color=plot_color, edge.size=plot_weight)
        ggplot2::ggsave (paste (save_dir, paste (module, '_module.png', sep=''), sep='/'))
}

visualise_all_modules <- function (datExpr, weight_power, celltype, save_dir, nTop=30){
        save_dir <- paste(save_dir, celltype, sep='/')
        if (!dir.exists (save_dir)){dir.create (save_dir)}

        # determine weight power
        weight_power <- determine_power_auto (datExpr, save_dir)

        # document
        sink (paste (save_dir, 'weight_power.txt', sep='/'))
        cat (paste('The weight power used to calculate the adjacency matrix in this experiment is', weight_power))
        cat (paste ('\n', Sys.time()))
        sink()

        sim_mat <- WGCNA::cor (datExpr, method="pearson")
        adj_mat <- adjacency.fromSimilarity (sim_mat,power=weight_power)
        TOM <- TOMsimilarity(adj_mat)

        # Recalculate topological overlap
        net <- block_with_parameter (datExpr, weight_power)
        all_modules <- c(unique (net$colors), 'all')
        for (module in all_modules){
                print (paste('Making graph for', module, 'module'))
                visualise_module (datExpr, TOM, weight_power, net, module, celltype,
                                  save_dir, sim_mat)
        }
}

WGCNA_network_data <- function (datExpr, weight_power, save_hist,
                                show_weight_threshold=0.5,
                                show_weight_percentile=0.9){
        datExpr_noribo <- remove_ribosomal_genes (datExpr, row_is_gene=FALSE)
        print (dim(datExpr_noribo))
        sim_mat <- WGCNA::cor (datExpr_noribo, method="pearson")
        adj_mat <- WGCNA::adjacency.fromSimilarity (sim_mat,power=weight_power)
        TOM <- WGCNA::TOMsimilarity(adj_mat)

        #con_mat <- as.data.frame(TOM*sign(sim_mat))
        con_mat <- as.data.frame(sim_mat)
        rownames (con_mat) <- colnames (datExpr_noribo)
        colnames (con_mat) <- colnames (datExpr_noribo)

        sim_red <- NU$graph_embedding (datExpr_noribo)
        net <- block_with_parameter (datExpr_noribo, weight_power = weight_power)
        con_mat_final <- NU$prepare_network_plot (con_mat, datExpr_noribo,
                                                  net$colors, save_hist,
                                                  sim_red,
                                                  show_weight_threshold,
                                                  show_weight_percentile)
        all_cluster <- length (unique (con_mat_final$cluster))
        cluster_level <- paste ( 'cluster', 1:all_cluster, sep='_')
        con_mat_final$cluster <- cluster_level [factor (con_mat_final$cluster) ]
        return (con_mat_final)
}

WGCNA_all <- function (x, top_markers, wgcna_dir, celltype, feature='reassign',
                       weight_thresh=NULL, weight_perc=0.999, min_express=1,
                       return_con_mat=F){
        datExpr <- subset_celltype (x, top_markers, celltype, feature)
        datExpr <- datExpr [, colMeans (datExpr) > min_express]
        print ('determining power')
        weight_power <- determine_power_auto (datExpr, paste (wgcna_dir, celltype, sep='/'))

        print ('running WGCNA')
        con_mat_final <- WGCNA_network_data (datExpr, weight_power=weight_power,
                                             save_hist=paste (wgcna_dir, celltype, 'weight_hist.png', sep='/'),
                                             show_weight_threshold=weight_thresh,
                                             show_weight_percentile=weight_perc)

        print ('plotting network')
        NU$plot_network (con_mat_final, paste (wgcna_dir, celltype, 'corr_pca.png', sep='/'))
        if (return_con_mat ) {return (con_mat_final)}
}

# ------------------
# Eigengene analysis
# ------------------

#' Find eigengene of WGCNA modules
#'
#' @description Calculate the WGCNA connectivity matrix, perform hierarchical
#' clustering, cut tree, and then eigengene values. Weight power is
#' automatically determined
#' @param x a Seurat object
#' @param save_dir where to store the genes of each WGCNA cluster
#' @param cluster_num on which cell cluster WGCNA is performed. 'all' means the
#' entire dataset is involved
#' @param select_type where the cluster information is stored.
#' @param ... other parameters to pass to `block_with_parameters` which
#' corresponds to `WGCNA::blockwiseModules`
#' @export
find_eigengene <- function (x, save_dir, cluster_num='all',
                            select_type='revised', save_eigen=T,
                            return_eigengene=F, top_markers=NULL, ...){
        save_dir <- paste (save_dir, 'WGCNA', sep='/')
        if (!dir.exists (save_dir)){dir.create (save_dir) }
        datExpr <- subset_celltype(x, cluster_num=cluster_num, 
                                   top_markers=top_markers, select_type=select_type)
        print ('determining the power for scale-free network')
        weight_power <- determine_power_auto (datExpr, save_dir)
        #datExpr_noribo <- remove_ribosomal_genes (datExpr, row_is_gene=FALSE)
        net <- block_with_parameter (datExpr, weight_power, ...)
        print ('saving gene clusters')
        if (save_eigen) {save_eigengene (net, save_dir)}
        if (return_eigengene){
                print ('calculating eigengenes')
                eig_genes <- WGCNA::moduleEigengenes (datExpr, net$colors)
                datME<- eig_genes$eigengene
                return (datME)
        }
}

make_eigen_heatmap <- function (x, datME, group.by, ...){
        heat_net <- Seurat::CreateSeuratObject (t(as.matrix (datME)), meta.data=x@meta.data)
        color_row <- colnames (datME)
        seurat_heat (heat_net, group.by, color_row, slot_data = 'counts', ...)
}

save_eigengene <- function (net, wgcna_dir) {
        color_list <- list ()
        all_colors <- unique (net$colors)
        for (i in 1:length (all_colors) ){
                color_list [[i]] <- names(net$colors [net$colors == all_colors [i] ])
        }
        max_genes <- max ( unlist(lapply (color_list, length) )) 
        color_df <- matrix ("",  max_genes, length (all_colors))

        for (i in 1:length (all_colors) ){
                color_df [1:length (color_list[[i]]) ,i] <- color_list [[i]]
        }

        new_colors <- colors2labels  (all_colors, prefix='GC')
        colnames (color_df) <- new_colors
        color_vec <- new_colors [match (net$colors, all_colors) ]
        print (class (color_vec))
        print (dim (color_vec))
        names (color_vec) <- names (net$colors)
        utils::write.csv (color_df, paste (wgcna_dir, 'module_genes.csv', sep='/') )
        utils::write.csv (color_vec, paste (wgcna_dir, 'module_net_genes.csv', sep='/') )
}

#' Convert WGCNA generated names to cluster names
#'
#' @export
colors2labels <- function (color_vec, prefix='cluster'){
        color_vec %>% as.factor () %>% as.numeric () -> num_vec
        num_vec <- paste (prefix, num_vec, sep='')
        return (gtools::mixedsort (num_vec))
}

#' Calculate intra modular connectivity
#' 
#' @param x a Seurat object
#' @param ... parameters for `subset_celltype`
#' @importFrom magrittr %>%
#' @export
hub_score <- function (x, weight_power, save_dir, ...){
        datExpr <- subset_celltype(x, cluster_num='all', top_markers='all',
                                   select_type='revised', ...)
        # calculate the adjacency matrix
        ADJ <- abs (WGCNA::cor (datExpr, use='p'))^weight_power

        # sorting out the names
        color_net <- utils::read.csv (paste (save_dir,
                        'WGCNA/module_net_genes.csv', sep='/'))
        color_net %>% tibble::deframe () -> color_net

        if (length(color_net) > nrow (ADJ)){
                print ('removing genes not in the adjacent matrix')
                color_net <- color_net [match (rownames (ADJ), names (color_net))]
        }

        intra_net <- WGCNA::intramodularConnectivity(ADJ, color_net)
        intra_net %>% tibble::add_column (cluster=color_net) %>% 
                tibble::add_column (gene=rownames (intra_net))
}

#' Save the hub score data frame
#'
#' @param markers a dataframe generated from `find_DE_genes`
#' @param hub_df a dataframe generated from `hub_score`
#' @importFrom magrittr %>%
#' @export
save_hub_score <- function (markers, hub_df, clusters, cell_types, save_dir,
                            other_celltypes=NULL, sel_columns=NULL){
        all_df <- list()
        if (length (cell_types)==1){cell_types <- rep (cell_types, length (clusters))}
        for (i in 1:length(clusters)){
                sel_hub <- hub_df %>% dplyr::filter (cluster == clusters[i])
                markers %>% dplyr::filter (group %in% c(cell_types[i])) -> sel_mark 
                sel_mark <- sel_mark [match (sel_hub$gene, sel_mark$feature), ]
                all_df [[i]] <- cbind (sel_hub, sel_mark)
        }
        final_dir <- paste (save_dir, paste ('TF_', cell_types[1], '.csv', sep=''), sep='/')
        do.call (rbind, all_df) %>% dplyr::arrange (dplyr::desc (kWithin) ) -> save_csv
        if (!is.null (sel_columns)){ save_csv %>% dplyr::select (dplyr::all_of (sel_columns)) -> save_csv }
        utils::write.csv (save_csv, final_dir, quote=F, row.names=F)
}
Yutong441/TBdev documentation built on Dec. 18, 2021, 8:22 p.m.