R/ratplots.R

Defines functions Volcano_Plot_GS_notworks DEG_similarity_table DEG_similarity_plot DEGs_UpDown_Plot Cluster_sig_DEGs Cluster_Marker_Violins MCsumm Cluster_GSEA_pvals Volcano_Plot Volcano_Plot_GS GSEA_Mountain Cluster_GSEA BellagioGeneSet BellagioPlot cluster_DEGs rename_clusters top_markers save_ploty capitalize

#Need
#check all existing functions
#standardize names between functions


capitalize <- function(x) {
  s <- strsplit(x, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2),
        sep="", collapse=" ")
}

save_ploty <- function(filename, plot, font = "Arial", font_size = 10, line_width = .5,
                       ncol = 1, nrow = 1, base_height = 3.71, base_asp = 1.618, base_width = NULL,
                       strip_text = F){
  plot <- plot + theme(text = element_text(family = font, size = font_size, color = 'black'),
                       axis.text = element_text(family = font, size = font_size, color = 'black'),
                       legend.text = element_text(family = font, size = font_size, color = 'black'),
                   axis.ticks = element_line(size = line_width, color = "black"),
                   axis.line = element_line(size = line_width, color = "black"))
  if(strip_text == T){
    plot <- plot + theme(axis.text.x = element_blank(), plot.title = element_blank(),
    axis.text.y = element_blank(), axis.title.x = element_blank(),
    axis.title.y = element_blank(), legend.position = "none")
  }
  save_plot(filename = filename, plot = plot, ncol = ncol, nrow = nrow, base_height = base_height,
            base_asp = base_asp, base_width = base_width)
}



#' return the top n markers from a list of markers as generated by FindAllMarkers()
#' by default gives the highest LFC after adjp filtering.
#'
#' @param marker.list optional: a list of markers for clusters. If not supplied,
#' will be calculated by FindAllMarkers() with default settings
#' @param n_markers number of markers per cluster to return
#' @param adj_p_cutoff adjp value for filtering
#' @return a tibble with top n markers from each cluster
#' @export
top_markers <- function(marker.list, n_markers = 1, adj_p_cutoff = .05){
  tryCatch({
    marker.list <- marker.list %>%
      filter(avg_logFC > 0) %>%
      mutate(sig = ifelse(p_val_adj < adj_p_cutoff, "", "(NS)")) %>%
      mutate(genesig = paste0(gene, sig))
    nclusters <- nlevels(marker.list$cluster)
    tops <- tibble()
    for(i in 1:nclusters){
      temp <- filter(marker.list, cluster == i-1) %>%
        arrange(sig, desc(avg_logFC))
      tops <- bind_rows(tops, head(temp, n_markers))
    }
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  tops <- select(tops, -sig)
  return(tops)
}
#' rename numeric seurat object clusters with output of top_markers
#'
#' @param object a seurat object
#' @param marker.list optional: a list of markers for clusters. If not supplied,
#' will be calculated by FindAllMarkers() with default settings
#' @return a seurat object with clusters named for top markers
#' @export
rename_clusters <- function(object, marker.list = NULL){
  tryCatch({
    if(is.null(marker.list)){
      DefaultAssay(object) <- "RNA"
      marker.list <- FindAllMarkers(object = object, only.pos = T)
    }
    new.cluster.ids <- make.unique(top_markers(marker.list)$genesig)
    names(new.cluster.ids) <- levels(object)
    ret <- RenameIdents(object, new.cluster.ids)
    return(ret)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")
    return(object)})

}

#' rename numeric seurat object clusters with output of top_markers
#'
#' @param object a seurat object
#' @param marker.list optional: a list of markers for clusters. If not supplied,
#' will be calculated by FindAllMarkers() with default settings
#' @return a seurat object with clusters named for top markers
#' @export
cluster_DEGs <- function(object, condition_1, condition_2, meta_slot = "orig.ident",
                         logfc.threshold = .1, min.pct = .05, test.use = "wilcox",
                         logFCcollapse = 100, slot = "data"){
  tryCatch({
    DefaultAssay(object) <- "RNA"
    levs <- levels(object@active.ident)
    Idents(object) <- paste0(object@active.ident, "_", object@meta.data[[meta_slot]])
    temp <- list()
    for(i in levs){
      tryCatch({
        x <- paste0(i, "_", condition_1)
        y <- paste0(i, "_", condition_2)
        temp[[i]] <- FindMarkers(object, ident.1 = x, ident.2 = y, logfc.threshold = logfc.threshold, min.pct = min.pct, test.use = test.use, slot = slot)
        temp[[i]] <- rownames_to_column(temp[[i]], var = "gene")
        temp[[i]]$avg_logFC[temp[[i]]$avg_logFC < -logFCcollapse] <- -logFCcollapse
        temp[[i]]$avg_logFC[temp[[i]]$avg_logFC > logFCcollapse] <- logFCcollapse
        temp[[i]]$cluster <- i
      }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
    }
    return(temp)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
#missing cluster column

BellagioPlot <- function(Cluster_DEG_list, logFCcollapse = 10,
                         padj.cutoff = .05, logFC.cutoff = .1,
                         pt.size = 1, pt.alpha = 1, xlab.italic = T){
  tryCatch({
    collapse <- bind_rows(Cluster_DEG_list)
    collapse$avg_logFC[collapse$avg_logFC < -logFCcollapse] <- -logFCcollapse
    collapse$avg_logFC[collapse$avg_logFC > logFCcollapse] <- logFCcollapse
    levs <- collapse$cluster %>% unique()
    collapse$cluster <- factor(collapse$cluster, levels = levs)

    p1 <- ggplot() +
      geom_point(data = subset(collapse, p_val_adj > padj.cutoff | abs(avg_logFC) < logFC.cutoff),
                 size = pt.size, aes(x = cluster, y = avg_logFC, color = "> .05"),
                 position = "jitter", alpha = pt.alpha) +
      geom_point(data = subset(collapse, p_val_adj <= padj.cutoff & abs(avg_logFC) >= logFC.cutoff),
                 size = pt.size, aes(x = cluster, y = avg_logFC, color = "<= .05"),
                 position = "jitter", alpha = pt.alpha) +
      scale_color_manual(name = "adjusted p val", values = c("black", "grey")) +
      theme_classic()
    if(xlab.italic == T){
      p1 <- p1 + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5, face = "italic"))
    } else {
      p1 <- p1 + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
    }
    return(p1)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

BellagioGeneSet <- function(Cluster_DEG_list, logFCcollapse = 10, features,
                            clusters = NULL, label.size = 2.5, label.alpha = .6,
                            padj.cutoff = NULL, pt.size = 1, label.nudge = 1,
                            gs.pt.size = 1, color.palette = "Spectral"){
  tryCatch({
    features <- capitalize(tolower(features))
    collapse <- bind_rows(Cluster_DEG_list)
    collapse$avg_logFC[collapse$avg_logFC < -logFCcollapse] <- -logFCcollapse
    collapse$avg_logFC[collapse$avg_logFC > logFCcollapse] <- logFCcollapse
    if(!is.null(clusters)){
      collapse <- filter(collapse, cluster %in% clusters)
    }
    levs <- collapse$cluster %>% unique()
    collapse$cluster <- factor(collapse$cluster, levels = levs)
    #ncolor <- length(unique(collapse$gene[collapse$gene %in% features]))
    data.sub <- filter(collapse, gene %in% features)
    if(!is.null(padj.cutoff)){
      data.sub <- filter(data.sub, p_val_adj <= padj.cutoff)
    }
    ncolor <- length(unique(data.sub$gene))
    mycolors <- grDevices::colorRampPalette(brewer.pal(8, color.palette))(ncolor)
    p1 <- ggplot() +
      geom_point(size = pt.size, data = collapse, aes(x = cluster, y = avg_logFC), position = position_jitter(height = 0, seed = 1), color = "gray") +
      geom_point(size = gs.pt.size, data = data.sub, aes(x = cluster, y = avg_logFC, color = gene), position = position_jitter(height = 0, seed = 1)) +
      scale_color_manual(values = mycolors) +
      theme_classic() +
      geom_text(data = data.sub, size = label.size, alpha = label.alpha, aes(x = cluster, y = avg_logFC, label = gene), position = position_nudge(y = label.nudge)) +
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5, face = "italic"))
    return(p1)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

#somehow the prepping of the rnkfile is producing rare duplicate gene names. need to fix
Cluster_GSEA <- function(Cluster_DEG_list, geneset, sig.snapshot = F,
                         minSize = 10, maxSize = 500, nperm = 10000,
                         nproc = 2){
  tryCatch({
    temp <- list()
    nclusters <- length(Cluster_DEG_list)
    geneset <- lapply(geneset, toupper)
    for(i in 1:nclusters){
      tryCatch({
        print(paste0("Calculating GSEA for cluster ", i))
        rnkfile <- data.frame(cbind(Cluster_DEG_list[[i]]$gene, Cluster_DEG_list[[i]]$avg_logFC), stringsAsFactors = F)
        colnames(rnkfile) <- c("ID", "t")
        rnkfile$t <- as.numeric(rnkfile$t)
        rnkfile <- rnkfile[complete.cases(rnkfile),]
        rnkfile[,1] = toupper(rnkfile[,1])
        rnkfile <- setNames(rnkfile$t, rnkfile$ID)
        temp[[i]] <- fgsea(geneset, rnkfile, minSize = minSize, maxSize = maxSize, nperm = nperm, nproc = nproc)
        temp[[i]]$cluster <- Cluster_DEG_list[[i]]$cluster[1]
        temp[[i]]$p.adj.adj <- p.adjust(temp[[i]]$pval, method = "BH", n = (nclusters * length(geneset)))
        rm(rnkfile)
      }, error=function(e){cat("ERROR :",conditionMessage(e), "\n", "No DEGs for cluster", i-1, "?", "\n")})
    }
    temp.sig <- dplyr::bind_rows(temp)
    temp.sig <- dplyr::filter(temp.sig, padj <= .05)

    if(sig.snapshot == F) {return(temp)} else {return(temp.sig)}
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

GSEA_Mountain <- function(Cluster_DEG_list, cluster, geneset, gseaParam = 1, ticksSize = .2){
  tryCatch({
    rnkfile <- data.frame(cbind(Cluster_DEG_list[[cluster]]$gene, Cluster_DEG_list[[cluster]]$avg_logFC), stringsAsFactors = F)
    colnames(rnkfile) <- c("ID", "t")
    rnkfile$t <- as.numeric(rnkfile$t)
    rnkfile <- rnkfile[complete.cases(rnkfile),]
    rnkfile[,1] = toupper(rnkfile[,1])
    rnkfile <- setNames(rnkfile$t, rnkfile$ID)
    p1 <- plotEnrichment(pathway = geneset, stats = rnkfile, gseaParam = gseaParam, ticksSize = ticksSize)
    return(p1)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}



Volcano_Plot_GS <- function(degfile, label_features, gene_column = "gene", logfc_column = "avg_logFC",
                            pvalue_column = "p_val", padj_column = "p_val_adj",
                            label_padj = .05, label_log2fc = 0, point_alpha = 1,
                            base_color = "gray", sig_color = "black", gs_color = "purple",
                            pt_size = 1, gs_size = 2, logFCcollapse = NULL, pval_collapse = NULL,
                            feature_label = T){
  tryCatch({
    if(!is.null(logFCcollapse)){
    degfile$avg_logFC[degfile$avg_logFC < -logFCcollapse] <- -logFCcollapse
    degfile$avg_logFC[degfile$avg_logFC > logFCcollapse] <- logFCcollapse
    }
    if(!is.null(pval_collapse)){
    degfile$p_val[degfile$p_val < pval_collapse] <- pval_collapse
    }
    label_features <- label_features %>% tolower() %>% capitalize()
    missing.features <- label_features[!(label_features %in% degfile[[gene_column]])]
    label.data <- filter(degfile, degfile[[gene_column]] %in% label_features)
    sig.data <- filter(degfile, abs(degfile[[logfc_column]]) > label_log2fc & degfile[[padj_column]] < label_padj)
    p1 <- ggplot(data = degfile, aes(x = degfile[[logfc_column]], y = -log10(degfile[[pvalue_column]]))) +
      geom_point(alpha = point_alpha, color = base_color, size = pt_size) +
      geom_point(alpha = point_alpha, size = pt_size, data = sig.data, color = sig_color, aes(x = sig.data[[logfc_column]], y = -log10(sig.data[[pvalue_column]]))) +
      theme_classic() +
      geom_point(size = gs_size, color = gs_color, data = label.data, aes(x = label.data[[logfc_column]], y = -log10(label.data[[pvalue_column]]))) +
      xlab("LogFC") +
      ylab("-Log10(p value)")
    if(feature_label == T){p1 <- p1 + ggrepel::geom_text_repel(segment.alpha = .5, color = gs_color, data = label.data, aes(x = label.data[[logfc_column]], y = -log10(label.data[[pvalue_column]]), label = label.data[[gene_column]]))
    }
    if(length(missing.features) > 0){
      warning(paste0("missing feature: ",  missing.features, " "))
    }
    return(p1)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

Volcano_Plot <- function(degfile, gene_column = "geneid", logfc_column = "log2fc",
                         pvalue_column = "pvalue", padj_column = "padj", color_by = "cutoffs",
                         cutoff_log2fc = 0, cutoff_padj = .05, point_alpha = 1){
  tryCatch({
    if(color_by == "cutoffs"){
      label.data <- filter(degfile, abs(degfile[[logfc_column]]) > cutoff_log2fc & degfile[[padj_column]] < cutoff_padj)
      p1 <- ggplot(data = degfile, aes(x = degfile[[logfc_column]], y = -log10(degfile[[pvalue_column]]))) +
        geom_point(alpha = point_alpha, color = "gray") +
        geom_point(alpha = point_alpha, data = label.data, color = "black", aes(x = label.data[[logfc_column]], y = -log10(label.data[[pvalue_column]]))) +
        geom_text_repel(data = label.data, color = "black", label = label.data[[gene_column]], aes(x = label.data[[logfc_column]], y = -log10(label.data[[pvalue_column]]))) +
        theme_classic() +
        xlab("LogFC") +
        ylab("-Log10(p value)")
    }else{
      p1 <- ggplot(data = degfile, aes(x = degfile[[logfc_column]], y = -log10(degfile[[pvalue_column]]), color = degfile[[color_by]])) +
        geom_point(alpha = point_alpha) +
        scale_color_gradient(name = color_by, trans = "log") +
        theme_classic() +
        xlab("LogFC") +
        ylab("-Log10(p value)")
    }
    return(p1)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

#is works? basic yes but not pathway filtering yet. maybe color pathways?
Cluster_GSEA_pvals <- function(GSEA_output, pathway = NULL){
  tryCatch({
    if(is.null(pathway)){
      bind <- bind_rows(GSEA_output)
      bind$cluster <- as.factor(bind$cluster)
      sig.values <- filter(bind, p.adj.adj <= .05)
      p1 <-  ggplot(data = bind, aes(x = reorder(bind$cluster, bind$padj), y = -log(pval))) +
        geom_point(color = "gray") +
        geom_point(data = sig.values, aes(x = reorder(sig.values$cluster, sig.values$padj), y = -log(pval)), color = "black") +
        theme_classic() +
        xlab("Cluster")
    }
    else{
      bind <- GSEA_output %>%
        bind_rows() %>%
        filter(pathway == pathway)
      bind$cluster <- as.factor(bind$cluster)
      sig.values <- filter(bind, p.adj.adj <= .05)
      p1 <-  ggplot(data = bind, aes(x = reorder(bind$cluster, bind$padj), y = -log(pval))) +
        geom_point(color = "gray") +
        geom_point(data = sig.values, aes(x = reorder(sig.values$cluster, sig.values$padj), y = -log(pval)), color = "black") +
        theme_classic() +
        xlab("Cluster")
    }
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  return(p1)
}

MCsumm <- function(pval){
  if(pval >=.05){x <- "NS"}
  if(pval < .05 & pval >= .01){x <- "*"}
  if(pval < .01 & pval >= .001){x <- "**"}
  if(pval < .001 & pval >= .0001){x <- "***"}
  if(pval < .0001){x <- "****"}
  return(x)
}


Cluster_Marker_Violins <- function(object, marker.list, label.size = 1){
  tryCatch({
    num.clusters <- nlevels(object@active.ident)
    plotlist <- vector()
    for(i in 1:num.clusters){
      tryCatch({
        assign(paste0("n", i), VlnPlot(object = object, features = c(top_n(filter(marker.list, cluster == i-1), 1, avg_logFC)$gene),
                                       ncol = 1, pt.size = 0) + NoAxes() + NoLegend() + theme(plot.title = element_text(face = "bold.italic", size = label.size)))
        plotlist <- append(plotlist, paste0("n", i))
      }, error=function(e){
        cat("ERROR :",conditionMessage(e), "Cluster", i, "\n")
        plotlist <- plotlist[-i]})
    }
    p1 <- plot_grid(plotlist = mget(plotlist), ncol = 1)
    return(p1)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
#this will likely work when you figure out how to give plot_grid a char vector
#for this i should add a statement where idents are converted back to 0:whatever in case i or someone renames clusters
Cluster_sig_DEGs <- function(Cluster_DEG_list, padj = .05){

  num.clusters <- length(Cluster_DEG_list)
  temp.sig <- list()
  for(i in 1:num.clusters){
    tryCatch({
      temp <- as.data.frame(Cluster_DEG_list[[i]])
      temp <- rownames_to_column(temp)
      temp <- temp %>% filter(p_val_adj < padj)
      temp.sig[[i]] <- temp
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  }
  temp <- bind_rows(temp.sig)
  return(temp)

}
DEGs_UpDown_Plot <- function(Cluster_DEG_list, padj = .05, color.up = "#FFB81C", color.down = "#2774AE", return_table = F){
  num.clusters <- length(Cluster_DEG_list)
  temp.sig <- list()
  for(i in 1:num.clusters){
    tryCatch({
      temp <- as.data.frame(Cluster_DEG_list[[i]])
      temp <- rownames_to_column(temp)
      temp <- temp %>% filter(p_val_adj < padj)
      temp.sig[[i]] <- temp
    }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  }


  updown <- data.frame("Cluster" = numeric(), "Up" = numeric(), "Down" = numeric())
  for(i in 1:num.clusters){
    updown[i, "Cluster"] <- i-1
    updown[i, "Up"] <- sum(temp.sig[[i]]$avg_logFC > 0)
    updown[i, "Down"] <- sum(temp.sig[[i]]$avg_logFC < 0)
  }

  updown <- gather(updown, key = "key", value = "value", Up:Down)
  updown$Cluster <- as.factor(updown$Cluster)

  p1 <- ggplot() +
    geom_col(data = subset(updown, key == "Up"), aes(x = Cluster, y = value), fill = color.up) +
    geom_col(data = subset(updown, key == "Down"), aes(x = Cluster, y = -value), fill = color.down) +
    geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
    labs(y = "Up/Down regulated, adjp < .05", x = NULL, fill = NULL) +
    theme_classic()
  if(return_table == F){
  return(p1)} else {return(updown)}
}
#
DEG_similarity_plot <- function(Cluster_DEG_list_x, Cluster_DEG_list_y, cluster_name,
                                logFCcollapse = 10, data_shown = "all", label_data = "none",
                                pt.size = 1, pt.alpha = 1){
  tryCatch({
  data_1 <- Cluster_DEG_list_x[[cluster_name]] %>%
    select(gene, avg_logFC, p_val_adj)
  data_2 <- Cluster_DEG_list_y[[cluster_name]] %>%
    select(gene, avg_logFC, p_val_adj)
  data_use <- full_join(data_1, data_2, by = "gene")
  data_use$avg_logFC.x[data_use$avg_logFC.x < -logFCcollapse] <- -logFCcollapse
  data_use$avg_logFC.x[data_use$avg_logFC.x > logFCcollapse] <- logFCcollapse
  data_use$avg_logFC.y[data_use$avg_logFC.y < -logFCcollapse] <- -logFCcollapse
  data_use$avg_logFC.y[data_use$avg_logFC.y > logFCcollapse] <- logFCcollapse
  data_use$nacolor <- "Yes"
  data_use[complete.cases(data_use),]$nacolor <- "No"
  if(data_shown == "one_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.x <= .05 | p_val_adj.y <= .05)
  }
  if(data_shown == "both_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.x <= .05 & p_val_adj.y <= .05)
  }
  if(data_shown == "x_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.x <= .05)
  }
  if(data_shown == "y_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.y <= .05)
  }
  data_use[is.na(data_use)] <- 0
  print(paste0("Quadrant 1: ", nrow(data_use %>% filter(avg_logFC.y > 0 & avg_logFC.x > 0)), " genes"))
  print(paste0("Quadrant 2: ", nrow(data_use %>% filter(avg_logFC.y > 0 & avg_logFC.x < 0)), " genes"))
  print(paste0("Quadrant 3: ", nrow(data_use %>% filter(avg_logFC.y < 0 & avg_logFC.x < 0)), " genes"))
  print(paste0("Quadrant 4: ", nrow(data_use %>% filter(avg_logFC.y < 0 & avg_logFC.x > 0)), " genes"))
  crho <- cor.test(x = data_use$avg_logFC.x, y = data_use$avg_logFC.y, method = "spearman")
  ggplot(data = data_use, aes(x = avg_logFC.x, y = avg_logFC.y, color = nacolor)) +
    geom_point(alpha = pt.alpha, size = pt.size) +
    theme_classic() +
    xlim(-logFCcollapse,logFCcollapse) +
    ylim(-logFCcollapse,logFCcollapse) +
    ggtitle(paste0("Spearman Rho ", signif(crho$estimate, 3))) +
    labs(color = "NAs present?") +
    scale_color_manual(values = c("Black", "Gray"))
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}

DEG_similarity_table <- function(Cluster_DEG_list_x, Cluster_DEG_list_y, data_shown = "all"){

  clusters <- intersect(names(Cluster_DEG_list_x), names(Cluster_DEG_list_y))
  ret <- tibble()
  for(i in clusters){
  data_1 <- Cluster_DEG_list_x[[i]] %>%
    select(gene, avg_logFC, p_val_adj)
  data_2 <- Cluster_DEG_list_y[[i]] %>%
    select(gene, avg_logFC, p_val_adj)
  data_use <- full_join(data_1, data_2, by = "gene")
  if(data_shown == "one_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.x <= .05 | p_val_adj.y <= .05)
  }
  if(data_shown == "both_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.x <= .05 & p_val_adj.y <= .05)
  }
  if(data_shown == "x_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.x <= .05)
  }
  if(data_shown == "y_sig"){
    data_use <- data_use %>%
      filter(p_val_adj.y <= .05)
  }
  data_use[is.na(data_use)] <- 0
  tryCatch({
  suppressWarnings(crho <- cor.test(x = data_use$avg_logFC.x,
                                    y = data_use$avg_logFC.y, method = "spearman"))
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  temp <- tibble(i, crho$estimate, crho$p.value)
  ret <- bind_rows(ret, temp)
  }
  ret$padj <- p.adjust(ret$`crho$p.value`, method = "BH", n = length(ret$i))
  colnames(ret) <- c("Cluster", "Spearman Rho", "p val", "p adjusted")
  return(ret)
}

#this needs to be fixed because apparently you cant have two calls to geom_text. could probably rbind label.data 1 and 2, then color by defining column
Volcano_Plot_GS_notworks <- function(degfile, label_features.1, label_features.2 = NULL, gene_column = "geneid", logfc_column = "log2fc",
                                     pvalue_column = "pvalue", padj_column = "padj",
                                     label_padj = .05, point_alpha = 1){
  label.data.1 <- filter(degfile, degfile[[gene_column]] %in% label_features.1)
  sig.data <- filter(degfile, degfile[[padj_column]] < label_padj)
  if(is.null(label_features.2)){
    p1 <- ggplot(data = degfile, aes(x = degfile[[logfc_column]], y = -log10(degfile[[pvalue_column]]))) +
      geom_point(alpha = point_alpha, color = "#2774AE") +
      geom_point(alpha = point_alpha, data = sig.data, color = "#FFD100", aes(x = sig.data[[logfc_column]], y = -log10(sig.data[[pvalue_column]]))) +
      theme_classic() +
      geom_point(color = "purple", data = label.data.1, aes(x = label.data.1[[logfc_column]], y = -log10(label.data.1[[pvalue_column]]))) +
      geom_text(nudge_y = .25, data = label.data.1, aes(x = label.data.1[[logfc_column]], y = -log10(label.data.1[[pvalue_column]]), label = label.data.1[[gene_column]]))
  } else {
    label.data.2 <- filter(degfile, degfile[[gene_column]] %in% label_features.2)
    p1 <- ggplot(data = degfile, aes(x = degfile[[logfc_column]], y = -log10(degfile[[pvalue_column]]))) +
      geom_point(alpha = point_alpha, color = "#2774AE") +
      geom_point(alpha = point_alpha, data = sig.data, color = "#FFD100", aes(x = sig.data[[logfc_column]], y = -log10(sig.data[[pvalue_column]]))) +
      theme_classic() +
      geom_point(color = "purple", data = label.data.1, aes(x = label.data.1[[logfc_column]], y = -log10(label.data.1[[pvalue_column]]))) +
      geom_point(color = "darkgreen", data = label.data.2, aes(x = label.data.2[[logfc_column]], y = -log10(label.data.2[[pvalue_column]]))) +
      geom_text(color = "purple", nudge_y = .25, data = label.data.1, aes(x = label.data.1[[logfc_column]], y = -log10(label.data.1[[pvalue_column]]), label = label.data.1[[gene_column]]))
    geom_text(color = "darkgreen", nudge_y = .25, data = label.data.2, aes(x = label.data.2[[logfc_column]], y = -log10(label.data.2[[pvalue_column]]), label = label.data.2[[gene_column]]))
  }
  return(p1)
}
jevanveen/ratplots documentation built on Sept. 24, 2020, 11:42 a.m.