DEP: Differential Enrichment analysis of Proteomics data

Documented in plot_heatmap plot_single

#' Plot values for a protein of interest
#'
#' \code{plot_single} generates a barplot of a protein of interest.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param proteins Character,
#' The name(s) of the protein(s) to plot.
#' @param type 'contrast' or 'centered',
#' The type of data scaling used for plotting.
#' Either the fold change ('contrast') or
#' the centered log2-intensity ('centered').
#' @param plot Logical(1),
#' If \code{TRUE} (default) the barplot is produced.
#' Otherwise (if \code{FALSE}), the summaries which the
#' barplot is based on are returned.
#' @return A barplot (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot single proteins
#' plot_single(dep, 'USP15')
#' plot_single(dep, 'USP15', 'centered')
#' plot_single(dep, c('USP15', 'CUL1'))
#' plot_single(dep, c('USP15', 'CUL1'), plot = FALSE)
#' @export
plot_single <- function(dep, proteins, type = c("contrast", "centered"), plot = TRUE) {
  # Show error if inputs are not the required classes
  assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
    is.character(proteins),
    is.character(type),
    is.logical(plot),
    length(plot) == 1)

  # Show error if inputs do not contain required columns
  type <- match.arg(type)

  row_data <- rowData(dep, use.names = FALSE)

  if(any(!c("label", "condition", "replicate") %in% colnames(colData(dep)))) {
    stop("'label', 'condition' and/or 'replicate' columns are not present in '",
         deparse(substitute(dep)),
         "'\nRun make_se() or make_se_parse() to obtain the required columns",
         call. = FALSE)
  }
  if(length(grep("_p.adj|_diff", colnames(row_data))) < 1) {
    stop("'[contrast]_diff' and '[contrast]_p.adj' columns are not present in '",
         deparse(substitute(dep)),
         "'\nRun test_diff() to obtain the required columns",
         call. = FALSE)
  }
  if(!"name" %in% colnames(row_data)) {
    stop("'name' column not present in '",
         deparse(substitute(dep)),
         "'\nRun make_se() or make_se_parse() to obtain the required columns",
         call. = FALSE)
  }

  # Show error if an unvalid protein name is given
  if(all(!proteins %in% row_data$name)) {
    if(length(proteins) == 1) {
      rows <- grep(substr(proteins, 1, nchar(proteins) - 1),row_data$name)
      possibilities <- row_data$name[rows]
    } else {
      rows <- lapply(proteins, function(x)
        grep(substr(x, 1, nchar(x) - 1),row_data$name))
      possibilities <- row_data$name[unlist(rows)]
    }

    if(length(possibilities) > 0) {
      possibilities_msg <- paste0(
        "Do you mean: '",
        paste0(possibilities, collapse = "', '"),
        "'")
    } else {
      possibilities_msg <- NULL
    }
    stop("please run `plot_single()` with a valid protein names in the 'proteins' argument\n",
         possibilities_msg,
         call. = FALSE)
  }
  if(any(!proteins %in% row_data$name)) {
    proteins <- proteins[proteins %in% row_data$name]
    warning("Only used the following protein(s): '",
            paste0(proteins, collapse = "', '"),
            "'")
  }

  # Single protein
  subset <- dep[proteins]

  # Plot either the centered log-intensity values
  # per condition ('centered') or the average fold change of conditions
  # versus the control condition ('contrast') for a single protein
  if(type == "centered") {
    # Obtain protein-centered fold change values
    means <- rowMeans(assay(subset), na.rm = TRUE)
    df_reps <- data.frame(assay(subset) - means) %>%
      rownames_to_column() %>%
      gather(ID, val, -rowname) %>%
      left_join(., data.frame(colData(subset)), by = "ID")
    df_reps$replicate <- as.factor(df_reps$replicate)
    df <- df_reps %>%
      group_by(condition, rowname) %>%
      summarize(mean = mean(val, na.rm = TRUE),
        sd = sd(val, na.rm = TRUE),
        n = n()) %>%
      mutate(error = qnorm(0.975) * sd / sqrt(n),
             CI.L = mean - error,
             CI.R = mean + error) %>%
      as.data.frame()
    df$rowname <- parse_factor(df$rowname, levels = proteins)

    # Plot the centered intensity values for the replicates and the mean
    p <- ggplot(df, aes(condition, mean)) +
      geom_hline(yintercept = 0) +
      geom_col(colour = "black", fill = "grey") +
      geom_point(data = df_reps, aes(condition, val, col = replicate),
                 shape = 18, size = 5, position = position_dodge(width=0.3)) +
      geom_errorbar(aes(ymin = CI.L, ymax = CI.R), width = 0.3) +
      labs(x = "Baits",
           y = expression(log[2]~"Centered intensity"~"(\u00B195% CI)"),
           col = "Rep") +
      facet_wrap(~rowname) +
      theme_DEP2()
  }
  if(type == "contrast") {
    # Select values for a single protein
    df <- rowData(subset, use.names = FALSE) %>%
      data.frame() %>%
      select(name,
             ends_with("_diff"),
             ends_with("_CI.L"),
             ends_with("_CI.R")) %>%
      gather(var, val, -name) %>%
      mutate(contrast = gsub("_diff|_CI.L|_CI.R", "", var),
             var = gsub(".*_", "", var)) %>%
      spread(var, val)
    df$name <- parse_factor(df$name, levels = proteins)
    suffix <- get_suffix(df$contrast)
    if(length(suffix)) {df$contrast <- delete_suffix(df$contrast)}
    # Plot the average fold change of conditions versus the control condition
    p <- ggplot(df, aes(contrast, diff)) +
      geom_hline(yintercept = 0) +
      geom_col(colour = "black", fill = "grey") +
      geom_errorbar(aes(ymin = CI.L, ymax = CI.R), width = 0.3) +
      labs(x = suffix,
           y = expression(log[2]~"Fold change"~"(\u00B195% CI)")) +
      facet_wrap(~name) +
      theme_DEP2()
  }
  if(plot) {
    return(p)
  } else {
    if(type == "centered") {
      df <- df %>%
        select(rowname, condition, mean, CI.L, CI.R)
      colnames(df) <- c("protein", "condition",
        "log2_intensity", "CI.L", "CI.R")
    }
    if(type == "contrast") {
      df <- df %>%
        select(name, contrast, diff, CI.L, CI.R) %>%
        mutate(contrast = paste0(contrast, suffix))
      colnames(df) <- c("protein", "contrast",
        "log2_fold_change", "CI.L", "CI.R")
    }
    return(df)
  }
}

# Internal function to get ComplexHeatmap::HeatmapAnnotation object
get_annotation <- function(dep, indicate) {
  assertthat::assert_that(
    inherits(dep, "SummarizedExperiment"),
    is.character(indicate))

  # Check indicate columns
  col_data <- colData(dep) %>%
    as.data.frame()
  columns <- colnames(col_data)
  if(all(!indicate %in% columns)) {
    stop("'",
      paste0(indicate, collapse = "' and/or '"),
      "' column(s) is/are not present in ",
      deparse(substitute(dep)),
      ".\nValid columns are: '",
      paste(columns, collapse = "', '"),
      "'.",
      call. = FALSE)
  }
  if(any(!indicate %in% columns)) {
    indicate <- indicate[indicate %in% columns]
    warning("Only used the following indicate column(s): '",
      paste0(indicate, collapse = "', '"),
      "'")
  }

  # Get annotation
  anno <- select(col_data, indicate)

  # Annotation color
  names <- colnames(anno)
  anno_col <- vector(mode="list", length=length(names))
  names(anno_col) <- names
  for(i in names) {
    var = anno[[i]] %>% unique() %>% sort()
    if(length(var) == 1)
      cols <- c("black")
    if(length(var) == 2)
      cols <- c("orangered", "cornflowerblue")
    if(length(var) < 7 & length(var) > 2)
      cols <- RColorBrewer::brewer.pal(length(var), "Pastel1")
    if(length(var) > 7)
      cols <- RColorBrewer::brewer.pal(length(var), "Set3")
    names(cols) <- var
    anno_col[[i]] <-  cols
  }

  # HeatmapAnnotation object
  HeatmapAnnotation(df = anno,
    col = anno_col,
    show_annotation_name = TRUE)
}

#' Plot a heatmap
#'
#' \code{plot_heatmap} generates a heatmap of all significant proteins.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param type 'contrast' or 'centered',
#' The type of data scaling used for plotting.
#' Either the fold change ('contrast') or
#' the centered log2-intensity ('centered').
#' @param kmeans Logical(1),
#' Whether or not to perform k-means clustering.
#' @param k Integer(1),
#' Sets the number of k-means clusters.
#' @param col_limit Integer(1),
#' Sets the outer limits of the color scale.
#' @param indicate Character,
#' Sets additional annotation on the top of the heatmap
#' based on columns from the experimental design (colData).
#' Only applicable to type = 'centered'.
#' @param clustering_distance "euclidean", "maximum", "manhattan", "canberra",
#' "binary", "minkowski", "pearson", "spearman", "kendall" or "gower",
#' Function used to calculate clustering distance (for proteins and samples).
#' Based on \code{\link[ComplexHeatmap]{Heatmap}}
#' and \code{\link[cluster]{daisy}}.
#' @param row_font_size Integer(1),
#' Sets the size of row labels.
#' @param col_font_size Integer(1),
#' Sets the size of column labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the heatmap is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' heatmap is based on are returned.
#' @param ... Additional arguments for Heatmap function as depicted in
#' \code{\link[ComplexHeatmap]{Heatmap}}
#' @return A heatmap (generated by \code{\link[ComplexHeatmap]{Heatmap}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot heatmap
#' plot_heatmap(dep)
#' plot_heatmap(dep, 'centered', kmeans = TRUE, k = 6, row_font_size = 3)
#' plot_heatmap(dep, 'contrast', col_limit = 10, row_font_size = 3)
#' @export
plot_heatmap <- function(dep, type = c("contrast", "centered"),
  kmeans = FALSE, k = 6,
  col_limit = 6, indicate = NULL,
  clustering_distance = c("euclidean", "maximum", "manhattan", "canberra",
    "binary", "minkowski", "pearson", "spearman", "kendall", "gower"),
  row_font_size = 6, col_font_size = 10, plot = TRUE, ...) {

  # Show error if inputs are not the required classes
  if(is.integer(k)) k <- as.numeric(k)
  if(is.integer(col_limit)) col_limit <- as.numeric(col_limit)
  if(is.integer(row_font_size)) row_font_size <- as.numeric(row_font_size)
  if(is.integer(col_font_size)) col_font_size <- as.numeric(col_font_size)
  assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
    is.character(type),
    is.logical(kmeans),
    is.numeric(k),
    length(k) == 1,
    is.numeric(col_limit),
    length(col_limit) == 1,
    is.numeric(row_font_size),
    length(row_font_size) == 1,
    is.numeric(col_font_size),
    length(col_font_size) == 1,
    is.logical(plot),
    length(plot) == 1)

  # Show error if inputs do not contain required columns
  type <- match.arg(type)
  clustering_distance <- match.arg(clustering_distance)

  # Extract row and col data
  row_data <- rowData(dep, use.names = FALSE)
  col_data <- colData(dep) %>%
    as.data.frame()

  # Show error if inputs do not contain required columns
  if(any(!c("label", "condition", "replicate") %in% colnames(col_data))) {
    stop(paste0("'label', 'condition' and/or 'replicate' columns are not present in '",
      deparse(substitute(dep)), "'"),
      call. = FALSE)
  }
  if(length(grep("_diff", colnames(row_data))) < 1) {
    stop(paste0("'[contrast]_diff' columns are not present in '",
      deparse(substitute(dep)),
      "'.\nRun test_diff() to obtain the required columns."),
      call. = FALSE)
  }
  if(!"significant" %in% colnames(row_data)) {
    stop(paste0("'significant' column is not present in '",
      deparse(substitute(dep)),
      "'.\nRun add_rejections() to obtain the required column."),
      call. = FALSE)
  }

  # Heatmap annotation
  if(!is.null(indicate) & type == "contrast") {
    warning("Heatmap annotation only applicable for type = 'centered'",
      call. = FALSE)
  }
  if(!is.null(indicate) & type == "centered") {
    ha1 <- get_annotation(dep, indicate)
  } else {
    ha1 <- NULL
  }

  # Filter for significant proteins only
  filtered <- dep[row_data$significant, ]

  # Check for missing values
  if(any(is.na(assay(filtered)))) {
    warning("Missing values in '", deparse(substitute(dep)), "'. ",
      "Using clustering_distance = 'gower'",
      call. = FALSE)
    clustering_distance <- "gower"
    obs_NA <- TRUE
  } else {
    obs_NA <- FALSE
  }

  # Get centered intensity values ('centered')
  if(type == "centered") {
    rowData(filtered)$mean <- rowMeans(assay(filtered), na.rm = TRUE)
    df <- assay(filtered) - rowData(filtered, use.names = FALSE)$mean
  }
  # Get contrast fold changes ('contrast')
  if(type == "contrast") {
    df <- rowData(filtered, use.names = FALSE) %>%
      data.frame() %>%
      column_to_rownames(var = "name") %>%
      select(ends_with("_diff"))
    colnames(df) <-
      gsub("_diff", "", colnames(df)) %>%
      gsub("_vs_", " vs ", .)
    df <- as.matrix(df)
  }

  # Facultative kmeans clustering
  if(kmeans & obs_NA) {
    warning("Cannot perform kmeans clustering with missing values",
      call. = FALSE)
    kmeans <- FALSE
  }
  if(kmeans & !obs_NA) {
    set.seed(1)
    df_kmeans <- kmeans(df, k)
    if(type == "centered") {
      # Order the k-means clusters according to the maximum fold change
      # in all samples averaged over the proteins in the cluster
      order <- data.frame(df) %>%
        cbind(., cluster = df_kmeans$cluster) %>%
        mutate(row = apply(.[, seq_len(ncol(.) - 1)], 1, function(x) max(x))) %>%
        group_by(cluster) %>%
        summarize(index = sum(row)/n()) %>%
        arrange(desc(index)) %>%
        pull(cluster) %>%
        match(seq_len(k), .)
      df_kmeans$cluster <- order[df_kmeans$cluster]
    }
    if(type == "contrast") {
      # Order the k-means clusters according to their average fold change
      order <- data.frame(df) %>%
        cbind(df, cluster = df_kmeans$cluster) %>%
        gather(condition, diff, -cluster) %>%
        group_by(cluster) %>%
        summarize(row = mean(diff)) %>%
        arrange(desc(row)) %>%
        pull(cluster) %>%
        match(seq_len(k), .)
      df_kmeans$cluster <- order[df_kmeans$cluster]
    }
  }

  if(ncol(df) == 1) {
    col_clust = FALSE
  } else {
    col_clust = TRUE
  }
  if(nrow(df) == 1) {
    row_clust = FALSE
  } else {
    row_clust = TRUE
  }
  if(clustering_distance == "gower") {
    clustering_distance <- function(x) {
      dist <- cluster::daisy(x, metric = "gower")
      dist[is.na(dist)] <- max(dist, na.rm = TRUE)
      return(dist)
    }
  }

  # Legend info
  legend <- ifelse(type == "contrast",
    "log2 Fold change",
    "log2 Centered intensity")

  # Heatmap
  ht1 = Heatmap(df,
    col = circlize::colorRamp2(
      seq(-col_limit, col_limit, (col_limit/5)),
      rev(RColorBrewer::brewer.pal(11, "RdBu"))),
    split = if(kmeans) {df_kmeans$cluster} else {NULL},
    cluster_rows = col_clust,
    cluster_columns = row_clust,
    row_names_side = "left",
    column_names_side = "top",
    clustering_distance_rows = clustering_distance,
    clustering_distance_columns = clustering_distance,
    heatmap_legend_param = list(color_bar = "continuous",
      legend_direction = "horizontal",
      legend_width = unit(5, "cm"),
      title_position = "lefttop"),
    name = legend,
    row_names_gp = gpar(fontsize = row_font_size),
    column_names_gp = gpar(fontsize = col_font_size),
    top_annotation = ha1,
    ...)
  if(plot) {
    # Plot
    draw(ht1, heatmap_legend_side = "top")
  } else {
    # Return data.frame
    colnames(df) <- gsub(" ", "_", colnames(df))
    df <- df[, unlist(column_order(ht1))]
    if(kmeans) {
      df <- cbind(df, k = df_kmeans$cluster)
    }
    return <- df[unlist(row_order(ht1)),]
    data.frame(protein = row.names(return), return) %>%
      mutate(order = row_number())
  }
}

#' Volcano plot
#'
#' \code{plot_volcano} generates a volcano plot for a specified contrast.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param contrast Character(1),
#' Specifies the contrast to plot.
#' @param label_size Integer(1),
#' Sets the size of name labels.
#' @param add_names Logical(1),
#' Whether or not to plot names.
#' @param adjusted Logical(1),
#' Whether or not to use adjusted p values.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the volcano plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' volcano plot is based on are returned.
#' @return A volcano plot (generated by \code{\link[ggplot2]{ggplot}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot volcano
#' plot_volcano(dep, 'Ubi6_vs_Ctrl', label_size = 5, add_names = TRUE)
#' plot_volcano(dep, 'Ubi6_vs_Ctrl', label_size = 5,
#'    add_names = TRUE, adjusted = TRUE)
#' plot_volcano(dep, 'Ubi6_vs_Ctrl', add_names = FALSE)
#' plot_volcano(dep, 'Ubi4_vs_Ctrl', label_size = 5, add_names = TRUE)
#' @export
plot_volcano <- function(dep, contrast, label_size = 3,
  add_names = TRUE, adjusted = FALSE, plot = TRUE) {
  # Show error if inputs are not the required classes
  if(is.integer(label_size)) label_size <- as.numeric(label_size)
  assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
    is.character(contrast),
    length(contrast) == 1,
    is.numeric(label_size),
    length(label_size) == 1,
    is.logical(add_names),
    length(add_names) == 1,
    is.logical(adjusted),
    length(adjusted) == 1,
    is.logical(plot),
    length(plot) == 1)

  row_data <- rowData(dep, use.names = FALSE)

  # Show error if inputs do not contain required columns
  if(any(!c("name", "ID") %in% colnames(row_data))) {
    stop(paste0("'name' and/or 'ID' columns are not present in '",
                deparse(substitute(dep)),
                "'.\nRun make_unique() to obtain required columns."),
         call. = FALSE)
  }
  if(length(grep("_p.adj|_diff", colnames(row_data))) < 1) {
    stop(paste0("'[contrast]_diff' and '[contrast]_p.adj' columns are not present in '",
                deparse(substitute(dep)),
                "'.\nRun test_diff() to obtain the required columns."),
         call. = FALSE)
  }
  if(length(grep("_significant", colnames(row_data))) < 1) {
    stop(paste0("'[contrast]_significant' columns are not present in '",
                deparse(substitute(dep)),
                "'.\nRun add_rejections() to obtain the required columns."),
         call. = FALSE)
  }

  # Show error if an unvalid contrast is given
  if (length(grep(paste(contrast, "_diff", sep = ""),
                  colnames(row_data))) == 0) {
    valid_cntrsts <- row_data %>%
      data.frame() %>%
      select(ends_with("_diff")) %>%
      colnames(.) %>%
      gsub("_diff", "", .)
    valid_cntrsts_msg <- paste0("Valid contrasts are: '",
                                paste0(valid_cntrsts, collapse = "', '"),
                                "'")
    stop("Not a valid contrast, please run `plot_volcano()` with a valid contrast as argument\n",
         valid_cntrsts_msg,
         call. = FALSE)
  }

  # Generate a data.frame containing all info for the volcano plot
  diff <- grep(paste(contrast, "_diff", sep = ""),
               colnames(row_data))
  if(adjusted) {
    p_values <- grep(paste(contrast, "_p.adj", sep = ""),
                     colnames(row_data))
  } else {
    p_values <- grep(paste(contrast, "_p.val", sep = ""),
                     colnames(row_data))
  }
  signif <- grep(paste(contrast, "_significant", sep = ""),
                 colnames(row_data))
  df <- data.frame(x = row_data[, diff],
                   y = -log10(row_data[, p_values]),
                   significant = row_data[, signif],
                   name = row_data$name) %>%
    filter(!is.na(significant)) %>%
    arrange(significant)

  name1 <- gsub("_vs_.*", "", contrast)
  name2 <- gsub(".*_vs_", "", contrast)

  # Plot volcano with or without labels
  p <- ggplot(df, aes(x, y)) +
    geom_vline(xintercept = 0) +
    geom_point(aes(col = significant)) +
    geom_text(data = data.frame(), aes(x = c(Inf, -Inf),
                                       y = c(-Inf, -Inf),
                                       hjust = c(1, 0),
                                       vjust = c(-1, -1),
                                       label = c(name1, name2),
                                       size = 5,
                                       fontface = "bold")) +
    labs(title = contrast,
      x = expression(log[2]~"Fold change")) +
    theme_DEP1() +
    theme(legend.position = "none") +
    scale_color_manual(values = c("TRUE" = "black", "FALSE" = "grey"))
  if (add_names) {
    p <- p + ggrepel::geom_text_repel(data = filter(df, significant),
                                      aes(label = name),
                                      size = label_size,
                                      box.padding = unit(0.1, 'lines'),
                                      point.padding = unit(0.1, 'lines'),
                                      segment.size = 0.5)
  }
  if(adjusted) {
    p <- p + labs(y = expression(-log[10]~"Adjusted p-value"))
  } else {
    p <- p + labs(y = expression(-log[10]~"P-value"))
  }
  if(plot) {
    return(p)
  } else {
    df <- df %>%
      select(name, x, y, significant) %>%
      arrange(desc(x))
    colnames(df)[c(1,2,3)] <- c("protein", "log2_fold_change", "p_value_-log10")
    if(adjusted) {
      colnames(df)[3] <- "adjusted_p_value_-log10"
    }
    return(df)
  }
}