R/mathScore.R

Defines functions mathScore

Documented in mathScore

#' mathScore
#' @description calculates MATH score of each tumor sample or based on Mutant-Allele Tumor Heterogeneity (MATH) approach.
#' 
#' @references Mroz, Edmund A. et al. Intra-Tumor Genetic Heterogeneity and Mortality in Head and Neck Cancer: Analysis of Data from The Cancer Genome Atlas. Ed. Andrew H. Beck. PLoS Medicine 12.2 (2015): e1001786.
#' 
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param withinTumor Calculate between-region heterogeneity within tumor. Default: FALSE.
#' @param min.vaf Specify The minimum VAF to filter variants. Default: 0.
#' @param use.adjVAF Use adjusted VAF in analysis when adjusted VAF or CCF is available. Default: FALSE. 
#' @param segFile The segment file.
#' @param use.tumorSampleLabel Logical (Default: FALSE). 
#' Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param ... Other options passed to \code{\link{subMaf}}
#' @return A data.frame of MATH scores
#' 
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' mathScore(maf)
#' 
#' @importFrom stats median
#' @export mathScore

## MATH Score main function
mathScore <- function(maf,
                      patient.id = NULL,
                      withinTumor = FALSE,
                      min.vaf = 0,
                      use.adjVAF = FALSE,
                      segFile = NULL,
                      use.tumorSampleLabel = FALSE,
                      ...
                      ){

    seg <- NULL
    if(!is.null(segFile)){
        seg <- readSegment(segFile)
    }
    processMATH <- function(m, seg){
        
        if(withinTumor){
            id_col <- "Tumor_ID"
            vaf_col <- "Tumor_Average_VAF"
        }else{
            id_col <- "Tumor_Sample_Barcode"
            vaf_col <- "VAF"
        }
        
        patient <- getMafPatient(m)
        maf_data <- getMafData(m)
        if(nrow(maf_data) == 0){
            message("Warning: there was no mutation in ", patient, " after filtering.")
            return(NA)
        }
        

        
        ## MATH Caculation
        calMATH <- function(subdata, seg, use.tumorSampleLabel){
            
            id <- unique(subdata[[id_col]])
            patient <- unique(subdata$Patient_ID)
            # message(paste("Calculating MATH score for ", id," of ", patient, sep = ""))
            
            if(!is.null(seg)){
                subdata <- copyNumberFilter(subdata,
                                            seg = seg, 
                                            use.tumorSampleLabel = use.tumorSampleLabel)
            }
            
            VAF <- subdata[[vaf_col]]
            VAF = VAF[!is.na(VAF)] 
            
            MAD <- 1.4826 * stats::median(abs(VAF - stats::median(VAF)))
            MATH <- 100 * MAD / stats::median(VAF)
            MATH <- round(MATH, digits=3)
            
            MATH.df <- data.frame(Patient_ID = unique(subdata$Patient_ID),
                                  id = id,
                                  MATH_Score = MATH)
            colnames(MATH.df) <- c("Patient_ID", id_col, "MATH_Score")
            return(MATH.df)
        }
        
        if(withinTumor){
            if(! "CCF" %in% colnames(maf_data)){
                stop(paste0("Calculation of MATH score requires CCF data when withinTumor is TRUE."))
            }
        }
        
        
        MATH.df <- maf_data %>% 
            dplyr::group_by(.data$Patient_ID, .data[[id_col]]) %>%
            dplyr::group_map(~calMATH(., seg = seg, use.tumorSampleLabel = use.tumorSampleLabel), .keep = TRUE) %>% 
            dplyr::bind_rows()
        
        return(MATH.df)
    }
    
    ## select subclonal mutation when withinTumor is TRUE
    if(withinTumor){
        clonalStatus <- "Subclonal"
    }else{
        clonalStatus <- NULL
    }
    
    maf_input <- subMaf(maf,
                        patient.id = patient.id,
                        min.vaf = min.vaf, 
                        use.adjVAF = use.adjVAF,
                        clonalStatus = clonalStatus,
                        use.tumorSampleLabel = use.tumorSampleLabel,
                        mafObj = TRUE, ...)
    
    MATH_list <- lapply(maf_input, processMATH, seg = seg)
    result <- MATH_list[!is.na(MATH_list)]
    # result <- dplyr::bind_rows(MATH_list)
    
    #y.limits <- c(
        #floor(min(MATH.df$MATH_Score)-15),
        #ceiling(max(MATH.df$MATH_Score)+15)
        ##ifelse(max(MATH.df$MATH_Score)+15 >100, 100, max(MATH.df$MATH_Score)+15)
    #)
    
    # # violin plot
    # if(plot){
    #     p <- ggplot(MATH.df, aes(x=as.factor(Patient_ID), y=MATH_Score, fill = Patient_ID)) + 
    #         geom_violin() + theme_bw() +     
    #         #ylim(y.limits) + 
    #         theme(legend.title = element_blank(),
    #         legend.text = element_text(size = 11),
    #         legend.key.size = unit(0.4, "cm"),
    #         panel.border = element_blank(), 
    #         panel.grid.major = element_line(linetype = 2, color = "grey"),
    #         panel.grid.minor = element_blank(),
    #         axis.line=element_line(color= "black", size= 1),
    #         axis.line.y = element_blank(),
    #         #axis.line.y = element_line(size=0.5, colour = "black"),
    #         #axis.ticks.y = element_line(size=0.5, colour = "black"),
    #         axis.line.x = element_blank(),
    #         axis.title = element_text(size = 12),
    #         axis.ticks.x = element_blank(),
    #         axis.text.x = element_text(size = 11, color = "black", angle = 90),
    #         axis.text.y = element_text(size = 11, color = "black")) + 
    #         scale_y_continuous(limits = y.limits, expand = c(0,0),
    #             breaks = round(seq(y.limits[1], y.limits[2], length = 5)))+
    #         #scale_y_continuous(breaks = c(y.limits[1], 40, 50, 60, 70, 80, 90, y.limits[2]), 
    #                            #limits = y.limits, labels = c(y.limits[1], 40,50,60,70,80,90,y.limits[2]),
    #                            #expand = c(0,0)) +
    #         scale_x_discrete(limits = levels(MATH.df$Patient_ID)) +
    #         labs(y = "MATH score", x = "") +
    #         annotate("segment", x = 0, xend = 0, y = y.limits[1], yend = y.limits[2], size = 0.6)      
    # }else{
    #     p <- NULL
    # }
    # 
    # return(list(MATH.df = MATH.df, MATH.plot = p))  
    if(length(result) == 0){
        return(NA)
    }
    return(result)   
    
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.