R/olink_bridgeability_plot.R
In OlinkAnalyze: Facilitate Analysis of Proteomic Data from Olink

Documented in olink_bridgeability_plot

#'Plots for each bridgeable assays between two platforms / products
#' Author : Amrita Kar
#'
#'Generates a combined plot per assay containing a violin and boxplot plot
#'  for IQR ranges ;
#'correlation plot of NPX values ;
#'a median count bar plot and
#'KS plots from 2 platforms
#'
#' @param data data frame of cross-product bridge normalized data generated by
#'  olink_normalization()
#' @param median_counts_threshold Numeric value to use for median counts
#'  threshold for the two platforms. Default is 150
#' @param min_count Numeric value to use for minimum counts
#'  for the two platforms. Default is 10
#'
#' @return An object of class "ggplot", 4 combined plots for each protein.
#' @export
#' @examples
#' \donttest{
#'
#'npx_ht <- OlinkAnalyze:::data_ht_small |>
#'  dplyr::filter(SampleType == "SAMPLE") |>
#'  dplyr::mutate(Project = "data1")
#'
#'npx_3072 <- OlinkAnalyze:::data_3k_small |>
#'  dplyr::filter(SampleType == "SAMPLE") |>
#'  dplyr::mutate(Project = "data2")
#'
#'overlapping_samples <- unique(intersect(npx_ht |>
#'                                          dplyr::distinct(SampleID) |>
#'                                          dplyr::pull(),
#'                                        npx_3072 |>
#'                                          dplyr::distinct(SampleID) |>
#'                                          dplyr::pull()))
#'
#'data <- OlinkAnalyze::olink_normalization(df1 = npx_ht,
#'                                          df2 = npx_3072,
#'                                          overlapping_samples_df1 =
#'                                            overlapping_samples,
#'                                          df1_project_nr = "Explore HT",
#'                                          df2_project_nr = "Explore 3072",
#'                                          reference_project = "Explore HT")
#'
#'olinkids <- unique(paste0(data$OlinkID,"_",data$Assay))
#'results <- olink_bridgeability_plot(data = data,
#'                           median_counts_threshold = 150,
#'                           min_count = 10)
#'names(results) <- olinkids
#'}

olink_bridgeability_plot <- function(data,
                            median_counts_threshold = 150,
                            min_count = 10) {

  if (!requireNamespace("ggpubr", quietly = TRUE)) {

    stop("These plots require the ggpubr package.
         Please install ggpubr before continuing.
            install.packages(\"ggpubr\")")
  }

  set.seed(seed = 1)

  out_plts <- list()

  ids <- data |>
    dplyr::select(OlinkID, .data[["BridgingRecommendation"]]) |> #HT OlinkID
    dplyr::distinct() |>
    dplyr::pull(OlinkID)

  # Adjusting the platform and add color green for bridgeable assays

  data <- data |>
    dplyr::filter(Count > min_count) |>
    dplyr::mutate(
      textcol = dplyr::if_else(stringr::str_detect( # nolint
        .data[["BridgingRecommendation"]], # nolint
        "No"), "#A61F04", "#00559E"))

  platforms <- unique(data$Project)

  if (length(platforms) != 2) {
    cli::cli_abort(
      message = paste0("Expect 2 platforms. ", length(platforms), "found.")
    )
  }

  # IQR/Range plot ----
  iqr_range_plt <- function(data = data, id = id) {

    col <- data |>
      dplyr::filter(OlinkID %in% id) |>
      dplyr::pull(.data[["textcol"]]) |>
      unique()

    iqr_range_plt <- data |>
      dplyr::filter(OlinkID %in% id) |>
      ggplot2::ggplot(ggplot2::aes(x = paste(Assay, OlinkID, sep = "\n"),
                                   y = NPX, fill = Project)) +
      ggplot2::geom_violin(alpha = 0.4,
                           position = ggplot2::position_nudge(x = 0),
                           width = 0.4) +
      ggplot2::geom_boxplot(width = .1, outlier.shape = NA, alpha = 0.4) +
      ggplot2::scale_fill_manual(values = c("#FF1F05", "#00C7E1")) +
      ggplot2::scale_color_manual(values = c("#FF1F05", "#00C7E1")) +
      ggplot2::labs(x = "Assay", y = "NPX Distribution", fill = "Platform: ") +
      ggplot2::guides(fill = ggplot2::guide_legend(nrow = 1, byrow = TRUE)) +
      OlinkAnalyze::set_plot_theme(font = "") +
      ggplot2::theme(axis.text.x = ggplot2::element_blank(),
                     strip.text = ggplot2::element_text(colour = col)) +
      ggplot2::facet_wrap(. ~ paste(Assay, OlinkID, sep = "\n"),
                          scales = "free")
    return(iqr_range_plt)
  }

  # Correlation plot ----
  r2_plt <- function(data = data, id = id) {

    data1 <- data |>
      dplyr::filter(Project %in% platforms[1],
                    OlinkID %in% id)

    data2 <- data |>
      dplyr::filter(Project %in% platforms[2],
                    OlinkID %in% id)

    data_wider <- data1 |>
      dplyr::select(SampleID, Assay, Count, OlinkID, UniProt, NPX,
                    .data[["textcol"]]) |>
      dplyr::inner_join(data2 |>
                          dplyr::select(SampleID, Assay, Count, OlinkID,
                                        UniProt, NPX, .data[["textcol"]]),
                        by = c("SampleID", "OlinkID", "Assay", "UniProt",
                               "textcol"),
                        suffix = c(platforms[1], platforms[2]))

    axis_names <- grep("NPX", colnames(data_wider), value = TRUE)

    r2_lm <- data_wider |>
      dplyr::mutate(r2_lm = stats::cor(x = .data[[axis_names[1]]],
                                       y = .data[[axis_names[2]]],
                                       use = "everything",
                                       method = "pearson") ^ 2) |>
      dplyr::pull(r2_lm) |>
      unique()
    r2_lm <- signif(r2_lm, 2)

    if (r2_lm < 0.2)
      caps <- paste0("Possibly bridging background to background")
    else caps <- ""

    col <- data_wider |>
      dplyr::filter(OlinkID %in% id) |>
      dplyr::pull(.data[["textcol"]]) |>
      unique()

    r2_plt <- data_wider |>
      ggplot2::ggplot(ggplot2::aes(x = .data[[axis_names[1]]],
                                   y = .data[[axis_names[2]]])) +
      ggplot2::geom_point(color = "blue", alpha = 0.4) +
      ggplot2::geom_smooth(method = "lm", color = "black") +
      ggpubr::stat_cor(method = "pearson",
                       ggplot2::aes(label =
                                      ggplot2::after_stat(.data[["rr.label"]])),
                       geom = "label") +
      OlinkAnalyze::set_plot_theme(font = "") +
      OlinkAnalyze::olink_color_discrete() +
      ggplot2::facet_wrap(. ~ paste(Assay, OlinkID, sep = "\n"),
                          scales = "free") +
      ggplot2::theme(strip.text = ggplot2::element_text(colour = col)) +
      ggplot2::labs(caption = caps)
    return(r2_plt)
  }

  # Median counts plot ----
  counts_plt <- function(data = data, median_counts_threshold,
                         id = id) {

    col <- data |>
      dplyr::filter(OlinkID %in% id) |>
      dplyr::pull(.data[["textcol"]]) |>
      unique()

    counts_plt <- data |>
      dplyr::filter(OlinkID %in% id) |>
      dplyr::group_by(OlinkID, Project) |>
      dplyr::mutate(median_count = median(Count, na.rm = TRUE)) |>
      dplyr::ungroup() |>
      dplyr::mutate(Assay = factor(paste(Assay, OlinkID, sep = "\n")),
                    Project = factor(Project)) |>
      dplyr::group_by(Assay, Project) |>
      dplyr::reframe(n = .data[["median_count"]]) |>
      dplyr::ungroup() |>
      dplyr::distinct() |>
      ggplot2::ggplot(ggplot2::aes(x = Assay, fill = Project, y = n,
                                   label = n)) +
      ggplot2::geom_col(width = 0.5, position = "dodge") +
      ggplot2::geom_text(position = ggplot2::position_dodge(0.5),
                         vjust = 0.5) +
      ggplot2::labs(y = "Median Count", fill = "Platform:") +
      OlinkAnalyze::set_plot_theme(font = "") +
      ggplot2::theme(axis.text.x = ggplot2::element_blank(),
                     strip.text = ggplot2::element_text(colour = col)) +
      ggplot2::facet_wrap(. ~ Assay, scales = "free") +
      ggplot2::geom_hline(yintercept = median_counts_threshold,
                          color = "#FF1F05", linewidth = 0.7)
    return(counts_plt)
  }

  # KS plot ----
  ks_plt <- function(data = data, id = id) {

    data1 <- data |>
      dplyr::filter(Project %in% platforms[1],
                    OlinkID %in% id)
    data2 <- data |>
      dplyr::filter(Project %in% platforms[2],
                    OlinkID %in% id)

    # Calculate empirical cumulative distribution function (ECDF) per platform
    ecdf_data1 <- stats::ecdf(data1$NPX)
    ecdf_data2 <- stats::ecdf(data2$NPX)

    # Find min/max statistics to draw line between points of greatest distance
    min_max <- seq(min(data1$NPX, data2$NPX), max(data1$NPX, data2$NPX),
                   length.out = length(data1$NPX))
    x0 <- min_max[which(abs(ecdf_data1(min_max) - ecdf_data2(min_max)) ==
                          max(abs(ecdf_data1(min_max) - ecdf_data2(min_max))))]
    y0 <- ecdf_data1(x0)
    y1 <- ecdf_data2(x0)

    ks_results <- stats::ks.test(data1$NPX, data2$NPX,
                                 alternative = "two.sided", exact = NULL,
                                 simulate.p.value = FALSE, B = 2000L)

    # Main KS plot construction
    ks_plt <- data |>
      dplyr::filter(OlinkID %in% id) |>
      ggplot2::ggplot(ggplot2::aes(x = NPX, group = Project, color = Project)) +
      ggplot2::scale_color_manual(values = c("#FF1F05", "#00C7E1")) +
      ggplot2::stat_ecdf(linewidth = 1) +
      ggplot2::theme_bw(base_size = 10) +
      ggplot2::theme(legend.position = "top") +
      ggplot2::labs(x = "NPX", y = "ECDF") +
      ggplot2::geom_segment(ggplot2::aes(x = x0[1], y = y0[1], xend = x0[1],
                                         yend = y1[1]),
                            linetype = "dashed", color = "black") +
      ggplot2::geom_point(ggplot2::aes(x = x0[1], y = y0[1]), color = "black",
                          size = 2) +
      ggplot2::geom_point(ggplot2::aes(x = x0[1], y = y1[1]), color = "black",
                          size = 2) +
      ggplot2::facet_wrap(. ~ paste(Assay, OlinkID, sep = "\n"),
                          scales = "free", nrow = 4, ncol = 5) +
      ggplot2::annotate("text", x = Inf, y = 0.1, hjust = 1,
                        cex = 4,
                        label = paste0("D = ", signif(ks_results$statistic,
                                                      2))) +
      ggplot2::theme(legend.title = ggplot2::element_blank()) +
      OlinkAnalyze::set_plot_theme(font = "") +
      ggplot2::labs(color = "Platform:")
    return(ks_plt)
  }

  # Bridgeable plot ----
  for (i in seq_along(ids)){
    final_plts <- list()

    message(paste0("Working on OlinkID : ", i))
    final_plts[["iqr"]] <- iqr_range_plt(data = data, id = ids[i])
    message("IQR plot done")
    final_plts[["r2"]] <- r2_plt(data = data, id = ids[i])
    message("R2 plot done")
    final_plts[["counts"]] <- counts_plt(data = data,
                                         median_counts_threshold, id = ids[i])
    message("Counts plot done")
    final_plts[["ks"]] <- ks_plt(data = data, id = ids[i])
    message("KS plot done")

    out1 <- ggpubr::ggarrange(final_plts$iqr, final_plts$counts,
                              nrow = 1, ncol = 2, legend = "top",
                              common.legend = TRUE)

    out2 <- ggpubr::ggarrange(final_plts$r2, out1,
                              nrow = 1, ncol = 2, legend = "right",
                              common.legend = FALSE)

    out <- ggpubr::ggarrange(out2, final_plts$ks,
                             nrow = 2, ncol = 1, legend = "top",
                             common.legend = FALSE)

    rm(out1, out2)

    out_plts[[i]] <- out
    rm(out, final_plts)
  }

  return(out_plts)
}