R/calRoutines.R

Defines functions calRoutines

Documented in calRoutines

#' calRoutines calculates the H index and JSI index according to the pair-wise comparison of CCF
#' @param maf Maf or MafList object generated by [MesKit::readMaf()] function.
#' @param PrimaryId Primary tumor IDs to indicate the primary-metastases relationships.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param pairByTumor Compare JSI between different tumors. Default TRUE.
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param CCF_cutoff The minimal cutoffs for the present status. The mutations with CCF smaller than CCF_cutoff were consider as absent statuses.
#' @param maf_drivers Driver information. Two columns are required, including "Mut_ID" and "is.driver". If provided, add the driver mutations in the plot. "Mut_ID" = str_c(Chromosome, Start_Position, Reference_Allele , Tumor_Seq_Allele2, sep = ":")
#' @param subtitle the information shows in the subtitle. Options including "JSI", "Hindex", "both" and "none". Default is "both"
#' @param ... Other options passed to [MesKit::subMaf()].
#' @references
#' 1. H index. Hu, Z., et al., Quantitative evidence for early metastatic seeding in colorectal cancer. Nat Genet, 2019. 51(7): p. 1113-1122.
#' 2. JSI index. Hu, Z., et al., Multi-cancer analysis of clonality and the timing of systemic spread in paired primary tumors and metastases. Nat Genet, 2020. 52(7): p. 701-708.
#'
#' @examples
#' library(MesKit)
#' data.type <- "split1"
#'
#' maf <- readMaf(
#'   mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)),
#'   ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)),
#'   clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)),
#'   refBuild = "hg19",
#'   ccf.conf.level = 0.95
#' )
#' cal <- calRoutines(
#'   maf = maf,
#'   patient.id = "Met1",
#'   PrimaryId = "Coad",
#'   pairByTumor = TRUE,
#'   use.tumorSampleLabel = TRUE,
#'   subtitle = "both"
#' )
#'
#' plot_grid(plotlist = cal$Met1$plist, nrow = 1)
#' ## add driver information
#' maf_driver <- data.frame(
#'   Mut_ID = c("5:112170777:CAGA:-", "1:147092680:-:C"),
#'   is.driver = c(TRUE, TRUE)
#' )
#' cal <- calRoutines(
#'   maf = maf,
#'   patient.id = "Met1",
#'   PrimaryId = "Coad",
#'   pairByTumor = TRUE,
#'   use.tumorSampleLabel = TRUE,
#'   subtitle = "both",
#'   maf_drivers = maf_driver
#' )
#'
#' plot_grid(plotlist = cal$Met1$plist, nrow = 1)
#' @import ggrepel
#'
#' @export
calRoutines <- function(maf,
                        PrimaryId,
                        patient.id = NULL,
                        CCF_cutoff = 0.10,
                        pairByTumor = TRUE,
                        use.tumorSampleLabel = FALSE,
                        maf_drivers = NULL,
                        subtitle = "both",
                        ...) {
  if (!is.null(maf_drivers)) {
    if (!all(c("Mut_ID", "is.driver") %in% colnames(maf_drivers))) {
      stop(message("check the column names of maf_drivers"))
    }
  }

  # Get the JSI Index
  JSI.list <- calJSI(maf,
    patient.id = patient.id,
    pairByTumor = pairByTumor,
    use.tumorSampleLabel = use.tumorSampleLabel,
    ...
  )
  names(JSI.list)

  # Get the pair-wise CCF comparison
  ccf.list <- compareCCF(maf,
    patient.id = patient.id,
    pairByTumor = pairByTumor,
    ...
  )

  processPairCCF <- function(patient.id, ccf.list, JSI.list) {
    if (!is.null(patient.id)) {
      ccf <- ccf.list[[patient.id]]

      if ("JSI.multi" %in% names(JSI.list)) {
        JSI <- JSI.list
      } else {
        JSI <- JSI.list[[patient.id]]
      }
    } else {
      ccf <- ccf.list
      JSI <- JSI.list
    }


    plist <- list()
    sample.pairs <- names(ccf)[grepl(PrimaryId, names(ccf))]
    stats <- list()

    for (i in sample.pairs) {
      nums <- c()
      message(sprintf("%s: %s", patient.id, i))
      pairs <- as.character(stringr::str_split(i, "-", simplify = T))
      pairs <- c(PrimaryId, pairs[which(pairs != PrimaryId)])
      JSI.value <- JSI$JSI.pair[pairs[1], pairs[2]]

      # merge driver info
      if (is.null(maf_drivers)) {
        Met1_C_OL <- ccf[[i]]
      } else {
        Met1_C_OL <- ccf[[i]] %>%
          dplyr::left_join(maf_drivers %>%
            dplyr::select(Mut_ID, is.driver) %>%
            unique())
      }


      Met1_group <- Met1_C_OL %>%
        dplyr::select(all_of(c(PrimaryId, pairs[!pairs %in% PrimaryId]))) %>%
        setNames(nm = c("Primary", "Metastases")) %>%
        # Set Lp and Lm
        dplyr::mutate(
          mut_group = ifelse(Primary >= CCF_cutoff & Metastases < CCF_cutoff, "Lp",
            ifelse(Metastases >= CCF_cutoff & Primary < CCF_cutoff, "Lm",
              ifelse(Primary >= CCF_cutoff & Metastases >= CCF_cutoff, "La", "Ls")
            )
          )
        ) %>%
        dplyr::mutate(
          S1 = ifelse(mut_group == "Lp" & Primary >= 0.1, 1, 0),
          S2 = ifelse(mut_group == "Lp" & Primary >= 0.2, 1, 0),
          S3 = ifelse(mut_group == "Lp" & Primary >= 0.4, 1, 0),
          S4 = ifelse(mut_group == "Lp" & Primary >= 0.6, 1, 0),
          S5 = ifelse(mut_group == "Lm" & Metastases >= 0.1, 1, 0),
          S6 = ifelse(mut_group == "Lm" & Metastases >= 0.2, 1, 0),
          S7 = ifelse(mut_group == "Lm" & Metastases >= 0.4, 1, 0),
          S8 = ifelse(mut_group == "Lm" & Metastases >= 0.6, 1, 0),
          S9 = ifelse(Metastases > 0.6 & Primary < 0.6 & Primary > CCF_cutoff, 1, 0)
        )

      nums[1] <- sum(Met1_group$mut_group == "Lp")
      nums[2] <- sum(Met1_group$mut_group == "Lm")
      nums[3] <- sum(Met1_group$mut_group == "La")
      nums[4] <- sum(Met1_group$mut_group == "Ls")
      nums[5] <- round(JSI.value, 3)

      Hindex <- nums[2] / (nums[1] + 1)

      nums[6] <- round(Hindex, 3)
      nums <- setNames(nums, c("Lp", "Lm", "La", "Ls", "JSI", "Hindex"))

      nums <- c(
        nums,
        Met1_group %>%
          dplyr::select(dplyr::all_of(paste0("S", 1:9))) %>%
          colSums()
      )

      stats[[stringr::str_c(pairs, collapse = "-")]] <- nums

      p1 <- Met1_C_OL %>%
        ggplot2::ggplot(ggplot2::aes_string(x = pairs[1], y = pairs[2])) +
        ggpubr::theme_classic2() +
        ggplot2::stat_density2d(ggplot2::aes(fill = ..density..^2),
          geom = "tile", contour = FALSE, n = 100
        ) +
        ggplot2::scale_fill_continuous(low = "white", high = "red") +
        ggplot2::geom_hline(yintercept = CCF_cutoff, linetype = 2, size = 0.5) +
        ggplot2::geom_vline(xintercept = CCF_cutoff, linetype = 2, size = 0.5) +
        ggplot2::geom_abline(slope = 1, intercept = 0, linetype = 3, size = 0.2, alpha = 0.5) +
        ggplot2::geom_point(alpha = 0.5) +
        ggplot2::xlim(0, 1) +
        ggplot2::ylim(0, 1) +
        ggplot2::labs(title = patient.id) +
        ggplot2::theme(
          legend.position = "none",
          plot.title = ggplot2::element_text(hjust = 0.5),
          plot.subtitle = ggplot2::element_text(hjust = 0.5)
        )

      if (subtitle == "both") {
        p1 <- p1 +
          ggplot2::labs(subtitle = sprintf("JSI = %.3f, Hindex = %.3f", JSI.value, Hindex))
      } else if (subtitle == "JSI") {
        p1 <- p1 +
          ggplot2::labs(subtitle = sprintf("JSI = %.3f", JSI.value))
      } else if (subtitle == "Hindex") {
        p1 <- p1 +
          ggplot2::labs(subtitle = sprintf("Hindex = %.3f", Hindex))
      }

      if (!is.null(maf_drivers)) {
        p1 <- p1 +
          ggplot2::geom_point(col = "blue", data = Met1_C_OL %>%
            dplyr::filter(is.driver), size = 3, shape = 5) +
          ggrepel::geom_label_repel(ggplot2::aes(label = Hugo_Symbol),
            data = Met1_C_OL %>%
              dplyr::filter(is.driver),
            nudge_x = 0.1, nudge_y = 0.1
          )
      }

      plist[[stringr::str_c(pairs, collapse = "-")]] <- p1
    }

    return(list(
      plist = plist,
      stats = stats
    ))
  }

  if (is.null(patient.id)) {
    patient.id <- names(maf)
  }

  result <- lapply(patient.id, processPairCCF, ccf.list, JSI.list)

  result <- result[!is.na(result)]
  names(result) <- patient.id

  if (length(result) == 0) {
    return(NA)
  }
  return(result)
}
qingjian1991/MPTevol documentation built on Jan. 30, 2023, 10:16 p.m.