R/compare_conditions.R

Defines functions diffConditionPlots diffConditionMrkrs

diffConditionMrkrs <- function(
  seurat_obj, group_clusters = NULL, cell_specific = FALSE,
  n_cores = 1, save_raw = TRUE, pval_cutoff = 0.05, save_collapsed = TRUE,
  cell_group_name = NULL, file_prefix = gsub(":|\ ", "-", Sys.time()), complete_gene_tbl =FALSE) {
  
  if (DefaultAssay(seurat_obj) != "RNA") {
    print("Changing default assay to RNA")
    DefaultAssay(seurat_obj) <- "RNA"
  }

  if (cell_specific) {
    folder <- paste0("cell-specific-diff-", gsub(":|\ ", "-", Sys.time()),"/")
  } else {
    folder <- paste0("cell-type-diff-", gsub(":|\ ", "-", Sys.time()),"/")
  }

  dir.create(figurePath(folder), showWarnings = FALSE)

  all_BCs <- Idents(seurat_obj)
  cell_type <- as.character(unique(Idents(seurat_obj)))
  meta <- seurat_obj@meta.data

  trt <- unique(meta$data.set)
  trt_cnt <- seq(1, (length(trt) * 2) - 1, by = 2)
  trt_BCs <- rep(list(character()), length(trt) * 2)

  if (is.null(group_clusters)) {
    n_clust <- seq_along(unique(Idents(seurat_obj)))
    iterations <- n_clust
  } else {
    n_clust <- which(unique(Idents(seurat_obj)) %in% group_clusters)
    iterations <- 1
  }

  marker_list <- list()[n_clust]
  table_list <- parallel::mclapply(iterations, mc.cores = n_cores,
    FUN = function(h) {
      print(paste0("Number of apply iterations = ",
        iterations, " over ", n_cores, " cores"))
      print(cell_type[n_clust])

      if (is.null(group_clusters)) {
        cell_group <- cell_type[h]
      } else {
        cell_group <- cell_type[n_clust]
      }

      if (cell_specific){
        for(i in seq_along(trt_cnt)){
          trt_BCs[[trt_cnt[i]]] <- rownames(meta[
            meta$data.set == trt[i] & meta$cell.type.ident %in% cell_group,])
          trt_BCs[[trt_cnt[i] + 1]] <- rownames(meta[
            !(meta$data.set == trt[i] & meta$cell.type.ident %in% cell_group),])
        }
      } else {
        for(i in seq_along(trt_cnt)) {
          trt_BCs[[trt_cnt[i]]] <- rownames(meta[
            meta$data.set == trt[i] & meta$cell.type.ident %in% cell_group,])
          trt_BCs[[trt_cnt[i] + 1]] <- rownames(meta[
            !meta$data.set == trt[i] & meta$cell.type.ident %in% cell_group,])
        }
      }

      diff_results <- list() # initalize list

      for (i in seq_along(trt_cnt)) {
        print("Running FindMarkers:")
        print(paste(trt[i], "vs. other timepoints"))

        condition1 <- trt_BCs[[trt_cnt[i]]]
        condition2 <- trt_BCs[[trt_cnt[i] + 1]]
        
        #ddiaz originalhead
        # diff_results[[i]] <- FindMarkers(seurat_obj, only.pos = FALSE,
        #   ident.1 = condition1, ident.2 = condition2, logfc.threshold = 0.25)
        # 
        if(complete_gene_tbl){
          diff_results[[i]] <- FindMarkers(seurat_obj, only.pos = FALSE,
           ident.1 = condition1, ident.2 = condition2, logfc.threshold = 0,
           min.cells.group = 0, 
           min.cells.feature = 0,
           min.pct = 0)
        }else{
          diff_results[[i]] <- FindMarkers(seurat_obj, only.pos = FALSE,
         ident.1 = condition1, ident.2 = condition2, logfc.threshold = 0.25)
        }
      }

      for (i in seq_along(trt_cnt)) {
        diff_results[[i]] <-
          diff_results[[i]][diff_results[[i]]$p_val < pval_cutoff,]
        
        diff_results[[i]]$pete_score <-
          diff_results[[i]]$pct.1 * diff_results[[i]]$avg_logFC * (
            diff_results[[i]]$pct.1 / diff_results[[i]]$pct.2)

        diff_results[[i]] <- diff_results[[i]][
          order(-1 * (diff_results[[i]]$pete_score)),]

        if (!is.null(cell_group_name)) {
          cell_group <- cell_group_name
        }

        diff_results[[i]]$cell.type.and.trt <-
          paste0(paste0(cell_group, "_", trt[i]), collapse = "-")
        
        diff_results[[i]]$Gene.name.uniq <- rownames(diff_results[[i]])
        diff_results[[i]] <- inner_join(diff_results[[i]],
          gene_info, by = "Gene.name.uniq")
      }
      return(diff_results)
    }
  ) # end mclapply

  if (cell_specific) {
    list_name <- paste0(file_prefix, "_marker_list_specific_")
    collapsed_name <- paste0(file_prefix, "_all_markers_specific_")
  } else {
    list_name <- paste0(file_prefix, "_marker_list_")
    collapsed_name <- paste0(file_prefix, "_all_markers_")
  }

  if (save_raw) {
    saveRDS(table_list, dataPath(paste0(list_name, script_name,"_.RDS")))
    table_list <- readRDS(dataPath(paste0(list_name, script_name,"_.RDS")))
  }

  # Collapse list of lists
  if (is.null(group_clusters)) {
    n_results <- n_clust
  } else {
    n_results <- 1
  }

  all_markers <- lapply(n_results, function (i) {bind_rows(table_list[[i]])})
  all_markers <- bind_rows(all_markers)

  if (save_collapsed) {
    saveRDS(all_markers,
      dataPath(paste0(collapsed_name, script_name,"_.RDS")))
    all_markers <- readRDS(
      dataPath(paste0(collapsed_name, script_name,"_.RDS")))
  }
  
  print("marker list (multiple data frames) path:")
  print(dataPath(paste0(list_name, script_name,"_.RDS")))
  cat("\n")
  print("collapsed marker list (single data frame) path:")
  print(dataPath(paste0(collapsed_name, script_name,"_.RDS")))
  
  return(all_markers)
}


# ==== Plot results from diffConditionClust
diffConditionPlots <- function(seurat_obj, input_table = NULL,
  folder_prefix = gsub(":|\ ", "-", Sys.time()), short_sig_figs = TRUE,
  n_genes = 200, n_cores = 4, split = FALSE, all_idents = FALSE) {

  if (DefaultAssay(seurat_obj) != "RNA") {
  stop("Default assay is not RNA")
  }

  if (is.null(input_table)) {
    stop(paste0("Input file with columns Gene.name.uniq ",
      "'cell.type.and.trt' and 'cell.type.ident' required"))
  }
  
  all_markers <- input_table
  print("Number or results for each combination of treatment and cell type:")
  print(table(all_markers$cell.type.and.trt))

  if (short_sig_figs) {
    seurat_obj@meta.data$cell.type.and.trt <- paste0(
      seurat_obj@meta.data$cell.type.ident, "_",
      seurat_obj@meta.data$data.set)

    # Shortening sig figs for plotting
    num_cols <- which(lapply(all_markers, function(i){class(i)}) == "numeric")
    all_markers[,num_cols] <- signif(all_markers[,num_cols], 3)

    DefaultAssay(seurat_obj) <- "RNA"
    assay <- "assay_RNA_"
  }

  seq_nums <- seq(1, n_genes, by = 20)
  meta <- seurat_obj@meta.data

  # get index change for cell type and treatment
  ind_chng <- match(unique(all_markers$cell.type.and.trt),
    all_markers$cell.type.and.trt)

  if (split){
    dir.create(figurePath(paste0(folder_prefix, "vln-plots")),
      showWarnings = FALSE)
    dir.create(figurePath(paste0(folder_prefix, "feat-plots")),
      showWarnings = FALSE)
  } else {
    dir.create(figurePath(paste0(folder_prefix, "feat-vln-plots")),
      showWarnings = FALSE)
  }

  parallel::mclapply(seq_along(ind_chng), mc.cores = n_cores, 
    function (i) {

      ifelse(seq_along(ind_chng[i]) == tail(seq_along(ind_chng),1),
        ind_range <- ind_chng[i]:((ind_chng[i + 1]) - 1),
        ind_range <- ind_chng[i]:nrow(all_markers))
      
      population <- paste0(all_markers$cell.type.and.trt[ind_range], " vs. ",
        strsplit(all_markers$cell.type.and.trt[ind_range],"-")[[1]][1],"-other")
      
      stats <- paste0("p-value: ", all_markers$p_val[ind_range], ", ",
        "avg. logFC: ", all_markers$avg_logFC[ind_range], ", ",
        "pct.1: ", all_markers$pct.1[ind_range], ", ",
        "pct.2: ", all_markers$pct.2[ind_range], ", ",
        "pete_score: ", all_markers$pete_score[ind_range])
      
      genes <- all_markers$Gene.name.uniq[ind_range]
      genes <- genes[1:n_genes]

      for(j in seq_along(seq_nums)) {
        if (all_idents) {
          cell_ident <- NULL
        } else {
          cell_ident <- gsub("_.*","",
            all_markers$cell.type.and.trt[ind_range][1])
        }

        sub_ind <- seq_nums[j]:(seq_nums[j]+19)
        pop_sub <- population[sub_ind]
        stats_sub <- stats[sub_ind]

        trt_with_index <- all_markers$cell.type.and.trt[ind_chng[i]]
        to_plot <- genes[sub_ind]
        if (NA %in% to_plot) {break}

        print(paste0(
          "generating plots ", seq_nums[j],"-", (seq_nums[j]+19), " ", ids[i]))
        vln_list <- VlnPlot(seurat_obj, to_plot, pt.size = 0.25,
          idents = cell_ident, cols = trt_colors, combine = FALSE,
          group.by = "data.set")
        
        for(k in seq_along(vln_list)) {
          vln_list[[k]] <- vln_list[[k]] + NoLegend() + labs(
            caption = paste(pop_sub[k], "\n", stats_sub[k])) +
          theme(plot.caption = element_text(hjust = 0))
        }

        feat_list <- FeaturePlot(seurat_obj, to_plot, reduction = "umap",
          pt.size = 0.25, combine = FALSE)
        
        for(k in seq_along(feat_list)) {
          feat_list[[k]] <- feat_list[[k]] + NoLegend() + NoAxes() + labs(
            caption = paste(pop_sub[k], "\n", stats_sub[k])) +
          theme(plot.caption = element_text(hjust = 0))
        }

        if(split) {
          vln_path <- figurePath(paste0(folder_prefix, "-vln-plots/",
            trt_with_index, "_top_", seq_nums[j],
            "-", (seq_nums[j] + 19),"_features.png"))

          feat_path <- figurePath(paste0(folder_prefix, "-feat-plots/",
            trt_with_index, "_top_", seq_nums[j],
            "-", (seq_nums[j] + 19),"_features.png"))

          png(vln_path, width = 30, height = 25, units = "in", res = 200)
          print(cowplot::plot_grid(plotlist = vln_list))
          dev.off()
          
          png(feat_path, width = 30, height = 25, units = "in", res = 200)
          print(cowplot::plot_grid(plotlist = feat_list))
          dev.off()
        } else {
          combined_path <- figurePath(paste0(folder_prefix, "feat-vln-plots/",
            trt_with_index, "_top_", seq_nums[j],
            "-", (seq_nums[j] + 19),"_features.png"))

          combined_list <- list()[1:40]
          seqs <- seq(1, 40, by = 5)

          for (x in seq(2, 8, by = 2)) {
            first_row <- (seqs[x-1]):(seqs[x-1]+4)
            next_row <- (seqs[x]):(seqs[x]+4)
            plot_slice <- ((((x/2)*5)-4):((x/2)*5))

            combined_list[first_row] <- feat_list[plot_slice]
            combined_list[next_row] <- vln_list[plot_slice]
          }

          png(combined_path,
            width = 50, height = 60, units = "in", res = 100)
          print(cowplot::plot_grid(plotlist = combined_list, ncol = 5))
          dev.off()
        }
      } # end print plot loop
    }
  ) # end mclappy vln
}


if (FALSE) {
  log_mat <- seurat_obj@assays$RNA@data

  geneMarkerDist <- function(i, group1) {
    if (group1 == TRUE) {bc_ind <- grepl(
      all_markers$cell.type.and.trt[i], meta$cell.type.and.trt)
      group <- "group-1"
    } else {bc_ind <- !grepl(
      all_markers$cell.type.and.trt[i], meta$cell.type.and.trt)
      group <- "group-2"}
    
    gene_vec <- log_mat[all_markers$Gene.name.uniq[i], bc_ind]
    
    stats <- summary(log_mat[all_markers$Gene.name.uniq[i], bc_ind])
    stats <- c(paste(all_markers$Gene.name.uniq[i], group),
      paste(names(stats), signif(unname(stats), 2), sep = ":"))
    return(stats)
  }
  
  # This takes awhile to run for the macrophage data, even with 10 cores
  stat_sum1 <- parallel::mclapply(1:nrow(all_markers),
    gene_marker_dist, group1 = TRUE, mc.cores = n_cores)

  stat_sum2 <- parallel::mclapply(1:nrow(all_markers),
    gene_marker_dist, group1 = FALSE, mc.cores = n_cores)

  save(stat_sum1, stat_sum2,
    file = dataPath(paste0("stat_sums", script_name, "_.RData")))
  load(file = dataPath(paste0("stat_sums", script_name, "_.RData")))
}
ntran95/SeuratExtensions documentation built on Feb. 14, 2022, 1:48 a.m.