R/get_purity.R

Defines functions get_purity

Documented in get_purity

#' Calculate purity or tumor content samples
#'
#' Estimate the proportion of cancer cells in the admixture of cells
#' forming the tumor microenvironment or the fraction of ctDNA in cfDNA samples.
#'
#' @param tumor_table A matrix of beta-values
#' @param info_sites A data.frame reporting a set of informative sites
#' generated by \code{\link{find_informative_sites}} or
#' \code{\link{find_informative_regions}}.
#' @return A data.frame with purity scores.
#' @importFrom stats median
#' @export
#' @examples
#' purity <- get_purity(tumor_toy_data,
#'  info_sites=data.frame(Probe=c("CpG_00001","CpG_00010","CpG_00020",
#'      "CpG_00015","CpG_00030","CpG_00045"), Site_type=rep(c("hyper","hypo"), each=3)))
get_purity <- function(tumor_table, info_sites) {
    # check parameters
    assertthat::assert_that(is.matrix(tumor_table))
    assertthat::assert_that(is.data.frame(info_sites))

    message(sprintf("[%s] # Calculate purity #", Sys.time()))

    hyper_sites <- info_sites$Probe[info_sites$Site_type=="hyper"]
    hypo_sites  <- info_sites$Probe[info_sites$Site_type=="hypo"]
    hyper_sites <- intersect(hyper_sites, rownames(tumor_table))
    hypo_sites  <- intersect(hypo_sites, rownames(tumor_table))

    message(sprintf("- Using %i hyper- and %i hypo-methylated sites",
                    length(hyper_sites), length(hypo_sites)))
    beta_values <- rbind(tumor_table[hyper_sites,], 1-tumor_table[hypo_sites,])
    Purity <- apply(beta_values, 2, median, na.rm = TRUE)
    purity_df <- data.frame(Sample=colnames(tumor_table), Purity)
    message(sprintf("[%s] Done",  Sys.time()))
    return(purity_df)
}
cgplab/PAMES documentation built on Dec. 4, 2022, 6:35 p.m.