R/scMappR_and_pathway_analysis.R

Defines functions scMappR_and_pathway_analysis

Documented in scMappR_and_pathway_analysis

#' Generate cellWeighted_Foldchange, visualize, and enrich.
#' 
#' This function generates cell weighted Fold-changes (cellWeighted_Foldchange), visualizes them in a heatmap, and completes pathway enrichment of cellWeighted_Foldchanges and bulk gene list.
#'
#' This function generates cellWeighted_Foldchanges for every cell-type (see deconvolute_and_contextualize), as well as the relative cell-type proportions (which will be reutrned and pushed through).
#' Then, it generates heatmaps of all cellWeighted_Foldchanges, cellWeighted_Foldchanges overlapping with the signature matrix, the entire signature matrix, the cell-type preference values from the signature matrix that overlap with inputted differentially expressed genes.
#' Then, if you have Wifi, it will complete gProfileR of the reordered cellWeighted_Foldchanges as well as a the ordered list of genes.
#' This function is a wrapper for deconvolute_and_contextualize and pathway_enrich_internal.
#' 
#' @rdname scMappR_and_pathway_analysis
#' @name scMappR_and_pathway_analysis
#' 
#' @param count_file Normalized RNA-seq count matrix where rows are gene symbols and columns are individuals. Either the object tself of the path of a .tsv file.
#' @param signature_matrix Signature matrix (recommended odds ratios) of cell-type specificity of genes. Either the object itself or a pathway to a .RData file containing an object named "wilcoxon_rank_mat_or" -- generally internal.
#' @param DEG_list An object with the first column as gene symbols within the bulk dataset (doesn't have to be in signature matrix), second column is the adjusted p-value, and the third the log2FC path to a .tsv file containing this info is also acceptable.
#' @param case_grep Tag in the column name for cases (i.e. samples representing upregulated) OR an index of cases.
#' @param control_grep Tag in the column name for controls (i.e. samples representing downregulated OR an index of controls).
#' @param max_proportion_change Maximum cell-type proportion change -- may be useful if there are many rare cell-types.
#' @param print_plots Whether boxplots of the estimated CT proportion for the leave-one-out method of CT deconvolution should be printed. The same name of the plots will be completed for top pathways.
#' @param plot_names The prefix of plot pdf files.
#' @param output_directory The name of the directory that will contain output of the analysis.
#' @param theSpecies -9 if using a pre-computed count matrix from scMappR, human, mouse, or a specied directly compatible with gProfileR. Removes Ensembl symbols if appended.
#' @param sig_matrix_size Number of genes in signature matrix for cell-type deconvolution.
#' @param drop_unknown_celltype Whether or not to remove "unknown" cell-types from the signature matrix.
#' @param internet Whether you have stable Wifi (T/F).
#' @param up_and_downregulated Whether you are additionally splitting up/downregulated genes (T/F).
#' @param gene_label_size The size of the gene label on the plot.
#' @param number_genes The number of genes to cut-off for pathway analysis (good with many DEGs).
#' @param toSave Allow scMappR to write files in the current directory (T/F).
#' @param rda_path If downloaded, path to where data from scMappR_data is stored. 
#' @param newGprofiler Whether to use gProfileR or gprofiler2 (T/F).
#' @param path If toSave == TRUE, path to the directory where files will be saved.
#' 
#' @return List with the following elements:
#' \item{cellWeighted_Foldchanges}{Cellweighted Fold-changes for all differentially expressed genes.}
#' \item{paths}{Enriched biological pathways for each cell-type.}
#' \item{TFs}{Enirched TFs for each cell-type.}
#' 
#' @importFrom ggplot2 ggplot aes geom_boxplot geom_text theme coord_flip labs element_text
#' @importFrom pheatmap pheatmap
#' @importFrom graphics barplot plot
#' @importFrom Seurat AverageExpression CreateSeuratObject PercentageFeatureSet SCTransform SelectIntegrationFeatures PrepSCTIntegration FindIntegrationAnchors IntegrateData DefaultAssay RunPCA RunUMAP FindNeighbors FindClusters ScaleData FindMarkers
#' @importFrom GSVA gsva
#' @importFrom stats fisher.test median p.adjust reorder t.test sd var complete.cases
#' @importFrom utils combn read.table write.table head tail
#' @importFrom downloader download
#' @importFrom grDevices pdf dev.off colorRampPalette
#' @importFrom gprofiler2 gost
#' @importFrom gProfileR gprofiler
#' @importFrom pcaMethods prep pca R2cum
#' @importFrom limSolve lsei
#'
#' @examples 
#' 
#' data(PBMC_example)
#' bulk_DE_cors <- PBMC_example$bulk_DE_cors
#' bulk_normalized <- PBMC_example$bulk_normalized
#' odds_ratio_in <- PBMC_example$odds_ratio_in
#' case_grep <- "_female"
#' control_grep <- "_male"
#' max_proportion_change <- 10
#' print_plots <- FALSE
#' theSpecies <- "human"
#' toOut <- scMappR_and_pathway_analysis(bulk_normalized, odds_ratio_in, 
#'                                       bulk_DE_cors, case_grep = case_grep,
#'                                       control_grep = control_grep, rda_path = "", 
#'                                       max_proportion_change = 10, print_plots = TRUE, 
#'                                        plot_names = "tst1", theSpecies = "human", 
#'                                        output_directory = "tester",
#'                                        sig_matrix_size = 3000, up_and_downregulated = FALSE, 
#'                                        internet = FALSE)
#' 
#' 
#' @export
#' 
scMappR_and_pathway_analysis <- function(  count_file,signature_matrix, DEG_list, case_grep, control_grep, rda_path = "", max_proportion_change = -9, print_plots=T, plot_names="scMappR",theSpecies = "human", output_directory = "scMappR_analysis",sig_matrix_size = 3000, drop_unknown_celltype = TRUE, internet = TRUE, up_and_downregulated = FALSE, gene_label_size = 0.4, number_genes = -9, toSave=FALSE, newGprofiler = FALSE, path = NULL) {
  
  
  
  # This function generates cellWeighted_Foldchanges for every cell-type (see deconvolute_and_contextualize) as well as the relative cell-type proportions (which will be reutrned and pushed through)
  # Then, it generates heatmaps of all cellWeighted_Foldchanges, cellWeighted_Foldchanges overlapping with the signature matrix, the signature matrix, the signature matrix overlapping with cellWeighted_Foldchanges
  # Then, if you have WIFI, it will complete g:ProfilR of the reordered cellWeighted_Foldchanges as well as a the ordered list of genes.
  # This function is a wrapper for deconvolute_and_contextualize, as well as pathway_enrich_internal  
  
  # Args:
  # count_file: Normalized RNA-seq count matrix where rows are gene symbols and columns are individuals
  # either the object tself of the path of a TSV file
  # signature_matrix: signature matrix (reccommended odds ratios) of cell-type specificity of genes
  # either the object itself or a pathway to an RData file containing an object named "wilcoxon_rank_mat_or" -- generally internal
  # DEG_list
  # an object with the first column as gene symbols within the bulk dataset (doesn't have to be in signature matrix), second column is the adjusted P-value, and the third the log2FC
  # path to a tsv file containing this info is also acceptable
  #case_grep
  # tag in the column name for cases (i.e. samples representing upregulated) OR an index of cases
  #control_grep
  # tag in the column name for cases (i.e. samples representing upregulated) OR an index of cases
  #max_proportion_change
  # maximum cell-type proportion change -- may be useful if there are many rare cell-types
  # print_plots 
  # whether boxplots of the estimated CT proportion for the leave-one-out method of CT deconvolution should be printed
  # The same name of the plots will be completed for top pathways
  #plot_names
  # if plots are being printed, the prefix of their pdf files
  # output_directory
  # the name of the directory that will contain off of the analysis
  #theSpecies
  # -9 if using a precomputed count matrix from scMappR, human otherwise.
  # removes ensembl symbols
  #sig_matrix_size 
  # number of genes in signature matrix for cell-type deconvolution
  # drop_unknown_celltype
  # whether or not to remove "unknown" cell-types from the signature matrix
  # WIFI: Whether you have stable WIFI -- T/F
  # up_and_downregulated: whether you are splitting up/downregulated genes -- T/F
  # gene_label_size = the size of the gene label on the plot
  
  # Returns: 
  # a directory with:
  # cellWeighted_Foldchanges in RData file
  # Cell Type proportions (RData file)
  # cell-type proportions leave one out (RData file)
  # heatmap of cellWeighted_Foldchanges (all)
  # heatmap of cellWeighted_Foldchanges (within signature)
  # heatmap of signature (all)
  # heatmap of signature (overlapping with DEG_list)
  # Pathway enrichment for DEG list(all)
  # RData file and Biological Processes
  # Pathway enrichment of cellWeighted_Foldchanges for each cell-type
  # RData file and biological processes
  
  # load in the count matrix
  
  count_class <- class(count_file)[1] %in% c("character", "data.frame", "matrix")
  
  if(count_class[1] == FALSE) {
    stop("count_file must be of class character, data.frame, or matrix.")
  }
  signature_class <- class(signature_matrix)[1] %in% c("character", "data.frame", "matrix")
  if(signature_class[1] == FALSE) {
    stop("count_file must be of class character, data.frame, or matrix.")
  }
  DEG_list_class <- class(DEG_list)[1] %in% c("character", "data.frame", "matrix") 
  
  if(DEG_list_class[1] == FALSE) {
    stop("DEG_list must be of class character, data.frame, or matrix.")
  }
  case_grep_class <- class(case_grep)[1] %in% c("character", "numeric", "integer")
  case_grep_class[1] == FALSE
  if(case_grep_class[1] == FALSE) {
    stop("case_grep must be of class character (as a single character designating cases in column names) or of class numeric (integer matrix giving indeces of cases).")
  }
  control_grep_class <- class(control_grep)[1] %in% c("character", "numeric", "integer")
  
  if(control_grep_class[1] == FALSE) {
    stop("control_grep must be of class character (as a single character designating controls in column names) or of class numeric (integer matrix giving indeces of controls).")
  }
  
  if(!(theSpecies %in% c("human", "mouse"))) {
    if(theSpecies != -9) {
      stop("species_name is not 'human' 'mouse' or '-9' (case sensitive), please try again with this filled.")
    }
  }

  if(!is.character(rda_path)) {
    stop("rda_path must be of class list.")
  }
  
  if(!is.numeric(max_proportion_change)) {
    stop("max_proportion_change must be of class numeric.")
  }
  
  if(!is.character(plot_names) ) {
    stop("plot_names must be of class character.")
  }
  
  if(!is.character(output_directory)) {
    stop("output_directory must be of class character.")
  }
  if(!is.numeric(sig_matrix_size) ) {
    stop("sig_matrix_size is not numeric.")
  }
  
  if(!is.numeric(gene_label_size)) {
    stop("gene_label_size must be of class numeric.")
  }
  
  if(!is.numeric(number_genes)) {
    stop("number_genes must be of class numeric.")
  }
  if(all(is.logical(print_plots),is.logical(drop_unknown_celltype),is.logical(internet),is.logical(up_and_downregulated),is.logical(toSave),is.logical(newGprofiler) ) == FALSE) {
   stop("print_plots, drop_unknown_celltype, internet, up_and_down_regulatedtoSave, newGprofiler: must all be class logical.") 
  }

  if(toSave == TRUE) {
      if(is.null(path)) {
        stop("scMappR is given write permission by setting toSave = TRUE but no directory has been selected (path = NULL). Pick a directory or set path to './' for current working directory")
      }
      if(!dir.exists(path)) {
        stop("The selected directory does not seem to exist, please check set path.")
    }
  }

  theSpecies <- tolower(theSpecies)
  
  if(is.character(count_file)) {
    norm_counts_i <- utils::read.table(count_file, header = TRUE, as.is = TRUE, sep = "\t")
    warning("reading in a count file where the first column is expected to be the row names.")    
    rownames(norm_counts_i) <- norm_counts_i[,1]
    norm_counts_i <- norm_counts_i[,-1]
  } else {
    norm_counts_i <- count_file
  }
  # get the background for later pathway analysis
  background_genes <-  rownames(norm_counts_i)  
  # list of differential expression
  if(is.character(DEG_list)) {
    DEGs <- utils::read.table(DEG_list, header = FALSE, as.is = TRUE, sep = "\t")
  } else {
    DEGs <- as.data.frame(DEG_list)
  }
  
  colnames(DEGs) <- c("gene_name", "padj", "log2fc")
  DEGs$gene_name <- tochr(DEGs$gene_name)
  DEGs$padj <- toNum(DEGs$padj)
  DEGs$log2fc <- toNum(DEGs$log2fc)
  rownames(DEGs) <- DEGs$gene_name
  if(number_genes == -9) {
    number_genes <- as.numeric(nrow(DEGs))
  }
  if((!(is.character(case_grep)) | length(case_grep) > 1)[1]) {
    message("Assuming that case_grep are indeces of 'control'.")
    message("Appending 'scMappR_control to controls.")
    colnames(count_file)[case_grep] <- paste0("scMappR_case_", colnames(count_file)[case_grep])
    case_grep <- "scMappR_case"
  }
  if((!(is.character(control_grep)) | length(control_grep) > 1)[1]) {
    message("Assuming that control_grep indeces of  'control'.")
    message("Appending 'scMappR_control to controls.")
    colnames(count_file)[control_grep] <- paste0("scMappR_control_", colnames(count_file)[control_grep])
    control_grep <- "scMappR_control"
  }
  
  cases <- grep(case_grep, colnames(count_file))
  control <- grep(control_grep, colnames(count_file))
  inter_case_control <- intersect(cases, control)
  if(length(inter_case_control) != 0) {
    stop("Samples in 'case' and 'control' overlap. Please check column names.")
  }
  if(any(length(cases) < 2, length(control) < 2)[1]) {
    stop("There is fewer than two cases or controls, please check 'case_grep' or 'control_grep'.")
  }
  
  if(is.character(signature_matrix)) {
    # assuming that the signature matrix is an RData file containg an object named wilcoxon_rank_mat_or (internal, generally)
    wilcoxon_rank_mat_or <- ""
    load(signature_matrix)
    signature_matrix <- wilcoxon_rank_mat_or
  }
  if(length(grep("-", rownames(signature_matrix))) / length(rownames(signature_matrix)) > 0.75 ) {  
    warning("More than 50 genes contian '-', and the signature matrix is considered internal")
    load(paste0(rda_path,"/bioMart_ortholog_human_mouse.rda"))
    # data(bioMart_ortholog_human_mouse)    
    RN_2 <- get_gene_symbol(signature_matrix)
    
    rownames(signature_matrix) <- RN_2$rowname
    #stop("testing")
    internal_species <- RN_2$species
    if(internal_species != theSpecies) {
      warning(paste0("the species from the signature matrix, ", internal_species, ", does not equal the initially inputted species, ", theSpecies, ". Converting gene symbols of 1-1 orthologs"))
      RN <- rownames(signature_matrix) # gene symbols

      thefiles <- list.files(path = rda_path, "bioMart_ortholog_human_mouse.rda")
      
      
      if(length(thefiles) == 0) {
        warning(paste0("Cell-marker databases are not present in ", rda_path, " downloading and loading data."))
        #
        datafile <- "bioMart_ortholog_human_mouse.rda"
        metafile <- paste0(datafile)
        url <- paste0("https://github.com/DustinSokolowski/scMappR_Data/blob/master/", 
                      metafile, "?raw=true")
        destfile <- file.path(tempdir(), metafile)
        downloader::download(url, destfile = destfile, mode = "wb")
        load(destfile)
        #
      } else {
        load(paste0(rda_path,"/bioMart_ortholog_human_mouse.rda"))
      }      
      
      # data(bioMart_ortholog_human_mouse) 
      
      rownames(bioMart_orthologs) <- bioMart_orthologs[,internal_species]
      intersected_genes <- intersect(RN, rownames(bioMart_orthologs)) # gene sybmols in signature
      signature_matrix1 <- signature_matrix[intersected_genes,]
      bioMart_orthologs1 <- bioMart_orthologs[intersected_genes,]
      rownames(signature_matrix1) <- bioMart_orthologs1[,theSpecies] # replacing rownames with the species you want
      message(dim(signature_matrix1))
      signature_matrix <- signature_matrix1
    }
  }
  
  # Identify cell weighted Fold-changes 
  if(toSave == FALSE) {
    print_plots <- FALSE
  }  
  message("Keeping genes in signature matrix that overlap with count matrix.")
  signature_matrix <- signature_matrix[rownames(signature_matrix) %in% rownames(count_file),]
  signature_vars <- apply(signature_matrix, 2, stats::var)
  signature_rowvars <- apply(signature_matrix, 1, stats::var)
  signature_matrix <- signature_matrix[signature_rowvars > 0,]
  signature_matrix <- signature_matrix[,signature_vars > 0]
  message("cell-types with markers that overlap with inputted count matrix")
  message(colnames(signature_matrix))
  
  if((length(unique(colnames(signature_matrix))) < length(colnames(signature_matrix)))[1]) {
    warning("cell-type naming is not unique, appending unique identifier (1:ncol(signature))")
    colnames(signature_matrix) <- paste0(colnames(signature_matrix), "_", 1:ncol(signature_matrix))
  }
  
  cellWeighted_Foldchanges <- deconvolute_and_contextualize(count_file, signature_matrix, DEG_list, case_grep , control_grep, max_proportion_change = max_proportion_change, print_plots = print_plots, plot_names = plot_names, theSpecies = theSpecies, sig_matrix_size = sig_matrix_size, drop_unknown_celltype = drop_unknown_celltype, toSave = toSave, path = path)

  # Computing t-test for changes in cell-type proportion
  ttest_decon <- function(x) {
    # This function takes the cell-type proporitons of cases and controls and completes t-tests to see if there are significant changes.  
    Cases <- cellWeighted_Foldchanges$cellType_Proportions[grep(case_grep, rownames(cellWeighted_Foldchanges$cellType_Proportions)),x] # proportion of cell-typeof cases
    Control <- cellWeighted_Foldchanges$cellType_Proportions[grep(control_grep, rownames(cellWeighted_Foldchanges$cellType_Proportions)),x] #controls
    case_control <- stats::t.test(Cases,Control)# t test
    case_control$p.value # p val
    case_control$statistic # t stat
    case_mean <- mean(Cases) # mean cases
    control_mean <- mean(Control) # mean control
    theName <- colnames(cellWeighted_Foldchanges$cellType_Proportions)[x] # cell-type
    toReturn <- c(unname(case_control$p.value), unname(case_control$statistic),case_mean, control_mean )
    return(toReturn)
  } 
  stacked <- lapply(1:ncol(cellWeighted_Foldchanges$cellType_Proportions), ttest_decon)
  proportion_T <- do.call("rbind",stacked) # t test of proportions of each cell-type and staack
  rownames(proportion_T) <- colnames(cellWeighted_Foldchanges$cellType_Proportions)
  colnames(proportion_T) <- c("P.Value", "T.Statistic", "CaseMean", "ControlMean")
  
  cellWeighted_Foldchanges$ProportionT.test <- proportion_T
  
  message(paste0("Making scMappR output directory named", output_directory, "."))
  if(toSave == FALSE) {
    warning("toSave == FALSE therefore files cannot be saved. Switching toSave = TRUE is strongly reccomended. Returning cellWeighted_Foldchanges and no pathway analysis.")
    message("toSave == FALSE therefore files cannot be saved. Switching toSave = TRUE is strongly reccomended. Returning cellWeighted_Foldchanges and no pathway analysis.")
    return(cellWeighted_Foldchanges)    
  }
  dir.create(paste0(path,"/",output_directory))
  
  scMappR_vals <- cellWeighted_Foldchanges$cellWeighted_Foldchange # scMappR values
  T_test_outs <- cellWeighted_Foldchanges$ProportionT.test
  message("Writing summary of cell-type proportion changes between case and control.")
  utils::write.table(T_test_outs, file = paste0(path,"/",output_directory, "/", plot_names, "_cell_proportion_changes_summary.tsv"), quote = FALSE, row.names = TRUE, col.names = TRUE, sep = "\t")
  
  #message(scMappR_vals)
  save(scMappR_vals, file = paste0(path,"/",output_directory, "/",plot_names, "_cellWeighted_Foldchanges.RData"))
  
  cell_proportions_all <- cellWeighted_Foldchanges$cellType_Proportions # all gene CT proportion
  save(cell_proportions_all, file = paste0(path,"/",output_directory, "/",plot_names, "_celltype_proportions.RData"))
  
  leave_one_out_proportions <- cellWeighted_Foldchanges$leave_one_out_proportions # leave one out avg CT proportions
  save(cell_proportions_all, file = paste0(path,"/",output_directory, "/",plot_names, "_leaveOneOut_gene_proportions.RData"))
  
  signature_mat <- cellWeighted_Foldchanges$processed_signature_matrix # processed_signaure_matrix
  sigmat_row <- apply(signature_mat, 1, stats::var)
  sigmat_col <- apply(signature_mat, 2, stats::var)
  signature_mat <- signature_mat[which(sigmat_row > 0), which(sigmat_col > 0)]
  save(signature_mat, file = paste0(path,"/",output_directory, "/",plot_names, "_leaveOneOut_gene_proportions.RData"))
  if(nrow(DEG_list) == 1) {
    warning("You only have 1 DEG, no heatmaps can be made. Returning cellWeighted_Foldchange")
    message("You only have 1 DEG, no heatmaps can be made. Returning cellWeighted_Foldchange")
    return(scMappR_vals)
    }
  myheatcol <- grDevices::colorRampPalette(c("lightblue", "white", "orange"))(256)
  
  # generate heatmaps for DEGs
  cex = gene_label_size
  
  scMappR_vals_vars <- apply(scMappR_vals,1,stats::var)
  scMappR_vals <- scMappR_vals[scMappR_vals_vars > 0,]
  
  inter <- intersect(rownames(scMappR_vals),rownames(signature_mat))
  
  #Absolute value plotting of scMappR vals
  if(length(inter) > 3) {
  scMappR_intersect <- scMappR_vals[inter,]
  scMappR_pref <- scMappR_vals[inter,]
  
  
  grDevices::pdf(paste0(path,"/",output_directory,"/",plot_names,"_cwFC_absoluteVals_signature_overlap.pdf")) 
  pl_abs <- pheatmap::pheatmap(abs(as.matrix(scMappR_intersect)), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
  dev.off()
  
  grDevices::pdf(paste0(path,"/",output_directory,"/",plot_names,"cwFC_absoluteVals_all.pdf"))
  pheatmap::pheatmap(abs(as.matrix(scMappR_vals[,pl_abs$tree_col$order])), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10, cluster_cols = FALSE)
  dev.off()
  
  grDevices::pdf(paste0(path,"/",output_directory,"/",plot_names,"_DEG_preferences_heatmap.pdf")) 
  pheatmap::pheatmap(as.matrix(scMappR_pref[pl_abs$tree_row$order,pl_abs$tree_col$order]), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10, cluster_rows = FALSE, cluster_cols = FALSE)
  dev.off()
  

  }
  
  scMappR_vals_up <- as.matrix(scMappR_vals[apply(scMappR_vals,1, sum) > 0,])
  scMappR_vals_down <- as.matrix(scMappR_vals[apply(scMappR_vals,1, sum) < 0,])
  grDevices::pdf(paste0(path,"/",output_directory,"/",plot_names,"_cell_proportions_heatmap.pdf")) 
  #gplots::heatmap.2(as.matrix(cellWeighted_Foldchanges$cellType_Proportions), Rowv = TRUE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
  pheatmap::pheatmap(as.matrix(cellWeighted_Foldchanges$cellType_Proportions), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
  grDevices::dev.off()
  if((nrow(scMappR_vals_up) > 2 & ncol(scMappR_vals_up) > 2)[1]) {
    grDevices::pdf(paste0(path,"/",output_directory, "/", plot_names,"_cellWeighted_Foldchanges_upregulated_DEGs_heatmap.pdf"))
    
    pheatmap::pheatmap(as.matrix(abs(scMappR_vals_up)), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
    #gplots::heatmap.2(as.matrix(abs(scMappR_vals_up)), Rowv = TRUE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
    grDevices::dev.off()
  } else {
    warning("There were fewer than two upregulated DEGs, therefore a heatmap could not be made.")
    message("There were fewer than two upregulated DEGs, therefore a heatmap could not be made.")
    
  }
  message(dim(scMappR_vals_down))
  if((nrow(scMappR_vals_down) > 2 & ncol(scMappR_vals_down) > 2)[1]) {
  grDevices::pdf(paste0(path,"/",output_directory, "/", plot_names,"_cellWeighted_Foldchanges_downregulated_DEGs_heatmap.pdf"))
  #gplots::heatmap.2(as.matrix(abs(scMappR_vals_down)), Rowv = TRUE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
  pheatmap::pheatmap(as.matrix(abs(scMappR_vals_down)), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
  
  grDevices::dev.off()
  } else {
    warning("There were fewer than two downregulated DEGs, therefore a heatmap could not be made.")
    message("There were fewer than two downregulated DEGs, therefore a heatmap could not be made.")
    
  }
  
  grDevices::pdf(paste0(path,"/",output_directory, "/",plot_names,"_all_CT_markers_in_background.pdf"))
  #gplots::heatmap.2(as.matrix(signature_mat), Rowv = TRUE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
  pheatmap::pheatmap(as.matrix(signature_mat), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
  
  grDevices::dev.off()
  
  celltype_preferred_degs <- intersect(rownames(scMappR_vals), rownames(signature_matrix)) # intersect DEGs and Genes in signature matrix
  
  if(length(celltype_preferred_degs) < 3) {
    warning("Fewer than 3 genes are both De and in the signature matrix. Therefore, these heatmaps will not be generated. Furthermore, there is insufficient re-ranking of genes for different pathway analyses to be neccesary. Therefore, here, cellWeighted_Foldchanges are more representative of a scaling factor for each cell-type.")
    message("Fewer than 3 genes are both De and in the signature matrix. Therefore, these heatmaps will not be generated. Furthermore, there is insufficient re-ranking of genes for different pathway analyses to be neccesary. Therefore, here, cellWeighted_Foldchanges are more representative of a scaling factor for each cell-type.")
    
    return(list(cellWeighted_Foldchanges = cellWeighted_Foldchanges))

  } else {
    
    # generating the heatmaps for cellWeighted_Foldchanges and signature matrix odds ratios that overlap with one another
    up_signature <- intersect(rownames(scMappR_vals_up), celltype_preferred_degs)
    down_signature <- intersect(rownames(scMappR_vals_down), celltype_preferred_degs)
    

    
    
    
    signature_mat_up <- as.matrix(signature_mat[up_signature,])
    signature_mat_down <- as.matrix(signature_mat[down_signature,])
    scMappR_vals_up1 <- as.matrix(scMappR_vals_up[up_signature,])
    scMappR_vals_down1 <- as.matrix(scMappR_vals_down[down_signature,])

    # Upregulated DEG Heatmap
    if(nrow(signature_mat_up) > 2 & ncol(signature_mat_up) > 2) {
    grDevices::pdf(paste0(path,"/",output_directory, "/",plot_names,"_celltype_specific_upregulated_cwFoldChange_heatmap.pdf"))
    #pl <- gplots::heatmap.2(as.matrix(signature_mat_up), Rowv = TRUE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
    pl <- pheatmap::pheatmap(as.matrix(scMappR_vals_up1), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
    
    #message(pl)
    grDevices::dev.off()
    
    
    grDevices::pdf(paste0(path,"/",output_directory, "/", plot_names,"celltype_specific_preferences_upregulated_DEGs_heatmap.pdf"))
    #gplots::heatmap.2(as.matrix(scMappR_vals_up1[rev(colnames(pl$carpet)),pl$colInd]),Colv=F, Rowv = FALSE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
    tst <- pheatmap::pheatmap(as.matrix(signature_mat_up)[pl$tree_row$order,pl$tree_col$order], color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10, cluster_cols = FALSE, cluster_rows = FALSE)
    
    grDevices::dev.off()
    } else {
      warning("There were fewer than two cell-type specific, upregulated DEGs, therefore a heatmap could not be made.")
      message("There were fewer than two cell-type specific, upregulated DEGs, therefore a heatmap could not be made.")
      
    }
    
    # Downregulated DEG Heatmap
    
    if((nrow(signature_mat_down) > 2 & ncol(signature_mat_down) > 2)[1]) {
    grDevices::pdf(paste0(path,"/",output_directory, "/",plot_names,"_celltype_specific_cellWeighted_Foldchanges_downregulated_heatmap.pdf"))
    pl2 <- pheatmap::pheatmap(as.matrix(scMappR_vals_down1), color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10)
    grDevices::dev.off()
    
    
    grDevices::pdf(paste0(path,"/",output_directory, "/", plot_names,"_celltype_specific_preferences_downregulated_DEGs_heatmap.pdf"))
    #gplots::heatmap.2(as.matrix(abs(scMappR_vals_down1[rev(colnames(pl2$carpet)),pl2$colInd])),Colv=F, Rowv = FALSE, dendrogram = "column", col = myheatcol, scale = "row", trace = "none", margins = c(7,7),cexRow = cex, cexCol = 0.3 )
    tst <- pheatmap::pheatmap(as.matrix(signature_mat_down)[pl2$tree_row$order,pl2$tree_col$order], color = myheatcol, scale = "row", fontsize_row = cex, fontsize_col = 10, cluster_cols = FALSE, cluster_rows = FALSE)
    
    grDevices::dev.off()
    }  else {
      warning("There were fewer than two cell-type specific, downregulated DEGs, therefore a heatmap could not be made.")
      message("There were fewer than two cell-type specific, downregulated DEGs, therefore a heatmap could not be made.")
      
    }
  }
  if(internet == FALSE) {
    warning("There is not a reported stable internet (WIFI = FALSE) and therefore pathway analysis with g:Prof")
    return("Done!")
  }

  up_and_down_together <- pathway_enrich_internal(  DEGs, theSpecies, scMappR_vals, background_genes, output_directory, plot_names, number_genes = number_genes, toSave=TRUE,path=path, newGprofiler = newGprofiler)
  if(up_and_downregulated == TRUE)  {
    message("Splitting genes by up- and down-regulated and then repeating analysis")
    rownames(DEGs) <- DEGs$gene_name
    upGenes <- DEGs$gene_name[DEGs$log2fc > 0]
    downGenes <- DEGs$gene_name[DEGs$log2fc < 0]


    
    upDEGs <- DEGs[upGenes,]
    downDEGs <- DEGs[downGenes,]
    
    if(length(upGenes) > 2) {
    upDir <- paste0(output_directory,"/upregulated") 
    dir.create(paste0(path,"/",upDir))
    message("Pathway analysis of upregulated genes")
    upcellWeighted_Foldchanges <- scMappR_vals[upGenes,]
    up_only <- pathway_enrich_internal(  upDEGs, theSpecies, upcellWeighted_Foldchanges, background_genes, upDir, plot_names, number_genes = number_genes, toSave=TRUE, path = path, newGprofiler = newGprofiler)
    } else {
      warning("There are fewer than three upregulated DEGs.")
    }
    message("Pathway analysis of downregulated genes")
    if(length(downGenes) > 2) {
    downDir <- paste0(output_directory, "/downregulated")
      
    dir.create(paste0(path,"/",downDir))
    DowncellWeighted_Foldchanges <- scMappR_vals[downGenes,]
    down_only <- pathway_enrich_internal(  downDEGs, theSpecies, DowncellWeighted_Foldchanges, background_genes, downDir, plot_names, number_genes = number_genes, toSave=TRUE, path = path, newGprofiler = newGprofiler)    
    } else {
      warning("There are fewer than three downregulated DEGs.")
      
    }
  }
  
  return(list(cellWeighted_Foldchanges = cellWeighted_Foldchanges, paths = up_and_down_together$biological_pathways, TFs = up_and_down_together$transcription_factors))
}
DustinSokolowski/scMappR documentation built on July 7, 2020, 5:44 p.m.