R/visualization.R

Defines functions plotROC GenePathwayOncoplots plotKMcurves

Documented in GenePathwayOncoplots plotKMcurves plotROC

#' @title Exact tests to detect mutually exclusive, co-occuring and altered genesets or pathways.
#' @description Performs Pair-wise Fisher's Exact test to detect mutually exclusive or co-occuring events.
#' @param freq_matrix The mutations matrix,generated by `get_mut_matrix`.
#' @param genes List of genes or pathways among which interactions should be tested.
#' @param pvalue Default c(0.05, 0.01) p-value threshold. You can provide two values for upper and lower threshold.
#' @param returnAll If TRUE returns test statistics for all pair of tested genes. Default FALSE, returns for only genes below pvalue threshold.
#' @param fontSize cex for gene names. Default 0.8.
#' @param showSigSymbols Default TRUE. Heighlight significant pairs.
#' @param showCounts Default TRUE. Include number of events in the plot.
#' @param countStats Default 'all'. Can be 'all' or 'sig'.
#' @param countType Default 'cooccur'. Can be 'all', 'cooccur', 'mutexcl'.
#' @param countsFontSize Default 0.8.
#' @param countsFontColor Default 'black'.
#' @param colPal colPalBrewer palettes. See RColorBrewer::display.brewer.all() for details.
#' @param nShiftSymbols shift if positive shift SigSymbols by n to the left, default = 5.
#' @param sigSymbolsSize size of symbols in the matrix and in legend.
#' @param sigSymbolsFontSize size of font in legends.
#' @param pvSymbols vector of pch numbers for symbols of p-value for upper and lower thresholds c(upper, lower).
#' @param limitColorBreaks limit color to extreme values. Default TRUE.
#' @importFrom stats fisher.test
#' @importFrom data.table rbindlist
#' @importFrom data.table data.table
#' @importFrom RColorBrewer brewer.pal
#' @importFrom grDevices colorRampPalette
#' @importFrom graphics par
#' @importFrom graphics mtext
#' @importFrom graphics axis
#' @importFrom graphics points
#' @importFrom graphics text
#' @importFrom graphics abline
#' @importFrom BiocGenerics image
#' @return list of data.tables
#' @export
#' @examples
#' #get the path of the mutation annotation file and samples' survival data
#' maf<-system.file("extdata","data_mutations_extended.txt",package = "pathwayTMB")
#' sur_path<-system.file("extdata","sur.csv",package = "pathwayTMB")
#' sur<-read.csv(sur_path,header=TRUE,row.names = 1)
#' #perform the function 'get_mut_matrix'
#' \donttest{mut_matrix<-get_mut_matrix(maffile=maf,mut_fre = 0.01,is.TCGA=FALSE,sur=sur)
#' #perform the function `get_PTMB`
#' PTMB_matrix<-get_PTMB(freq_matrix=mut_matrix,genesmbol=genesmbol,gene_path=gene_path)
#' set.seed(1)
#' final_character<-get_final_signature(PTMB=PTMB_matrix,sur=sur)
#' plotMutInteract(freq_matrix=PTMB_matrix, genes=final_character,nShiftSymbols =0.3)}

plotMutInteract<-function (freq_matrix, genes, pvalue = c(0.05, 0.01),returnAll = TRUE,fontSize = 0.8, showSigSymbols = TRUE,showCounts = FALSE, countStats = "all", countType = "all",
                           countsFontSize = 0.8, countsFontColor = "black", colPal = "BrBG",nShiftSymbols = 5, sigSymbolsSize = 2,sigSymbolsFontSize = 0.9, pvSymbols = c(46, 42), limitColorBreaks = TRUE){
  all.tsbs = colnames(freq_matrix)
  mutMat = t(freq_matrix[genes,])
  missing.tsbs = all.tsbs[!all.tsbs %in% rownames(mutMat)]
  if (nrow(mutMat) < 2) {
    stop("Minimum two genes required!")
  }
  mutMat[mutMat > 0] = 1

  interactions = sapply(1:ncol(mutMat), function(i) sapply(1:ncol(mutMat),
                                                           function(j) {
                                                             f <- try(fisher.test(mutMat[, i], mutMat[, j]), silent = TRUE)
                                                             if (inherits(f,"try-error"))
                                                               NA
                                                             else ifelse(f$estimate > 1, -log10(f$p.val), log10(f$p.val))
                                                           }))
  oddsRatio <- oddsGenes <- sapply(1:ncol(mutMat), function(i) sapply(1:ncol(mutMat),
                                                                      function(j) {
                                                                        f <- try(fisher.test(mutMat[, i], mutMat[, j]), silent = TRUE)
                                                                        if (inherits(f,"try-error"))
                                                                          f = NA
                                                                        else f$estimate
                                                                      }))
  rownames(interactions) = colnames(interactions) = rownames(oddsRatio) = colnames(oddsRatio) = colnames(mutMat)
  sigPairs = which(x = 10^-abs(interactions) < 1, arr.ind = TRUE)
  sigPairs2 = which(x = 10^-abs(interactions) >= 1, arr.ind = TRUE)
  if (nrow(sigPairs) < 1) {
    stop("No meaningful interactions found.")
  }
  sigPairs = rbind(sigPairs, sigPairs2)
  sigPairsTbl = data.table::rbindlist(lapply(X = seq_along(1:nrow(sigPairs)),
                                             function(i) {
                                               x = sigPairs[i, ]
                                               g1 = rownames(interactions[x[1], x[2], drop = FALSE])
                                               g2 = colnames(interactions[x[1], x[2], drop = FALSE])
                                               tbl = as.data.frame(table(apply(X = mutMat[, c(g1,
                                                                                              g2), drop = FALSE], 1, paste, collapse = "")))
                                               combn = data.frame(t(tbl$Freq))
                                               colnames(combn) = tbl$Var1
                                               pval = 10^-abs(interactions[x[1], x[2]])
                                               fest = oddsRatio[x[1], x[2]]
                                               d = data.table::data.table(gene1 = g1, gene2 = g2,
                                                                          pValue = pval, oddsRatio = fest)
                                               d = cbind(d, combn)
                                               d
                                             }), fill = TRUE)
  sigPairsTbl<-sigPairsTbl[-which(sigPairsTbl$gene1==sigPairsTbl$gene2),]
  sigPairsTbl[is.na(sigPairsTbl)] = 0
  sigPairsTbl$Event = ifelse(test = sigPairsTbl$oddsRatio >
                               1, yes = "Co_Occurence", no = "Mutually_Exclusive")
  sigPairsTbl$pair = apply(X = sigPairsTbl[, c("gene1", "gene2")],
                           MARGIN = 1, FUN = function(x) paste(sort(unique(x)),
                                                               collapse = ", "))
  sigPairsTbl$event_ratio<-sigPairsTbl$`01`+sigPairsTbl$`10`
  sigPairsTbl$event_ratio<-paste0(sigPairsTbl$`11`,"/", sigPairsTbl$event_ratio)
  sigPairsTblSig = sigPairsTbl[order(as.numeric(sigPairsTbl$pValue))][!duplicated(sigPairsTbl$pair)]
  if (nrow(interactions) >= 2) {
    diag(interactions) <- 0
    m <- nrow(interactions)
    n <- ncol(interactions)
    col_pal = RColorBrewer::brewer.pal(9, colPal)
    col_pal = grDevices::colorRampPalette(colors = col_pal)
    col_pal = col_pal(m * n - 1)
    interactions[lower.tri(x = interactions, diag = FALSE)] = NA
    oldpar <- par(no.readonly = TRUE)
    on.exit(par(oldpar))
    par(bty = "n", mar = c(1, 4, 4, 2) + 0.1, las = 2,
        fig = c(0, 1, 0, 1))
    breaks = NA
    if (limitColorBreaks) {
      minLog10pval = 3
      breaks <- seq(-minLog10pval, minLog10pval, length.out = m *
                      n + 1)
      interactions4plot = interactions
      interactions4plot[interactions4plot < (-minLog10pval)] = -minLog10pval
      interactions4plot[interactions4plot > minLog10pval] = minLog10pval
      interactions = interactions4plot
    }
    image(x = 1:n, y = 1:m, interactions, col = col_pal,
          xaxt = "n", yaxt = "n", xlab = "",
          ylab = "", xlim = c(0, n + 1), ylim = c(0,
                                                  n + 1), breaks = seq(-3, 3, length.out = (nrow(interactions) *
                                                                                              ncol(interactions))))
    abline(h = 0:n + 0.5, col = "white", lwd = 0.5)
    abline(v = 0:n + 0.5, col = "white", lwd = 0.5)
    mtext(side = 2, at = 1:m, text = colnames(mutMat),
          cex = fontSize, font = 3)
    mtext(side = 3, at = 1:n, text = colnames(mutMat),
          cex = fontSize, font = 3)
    if (showCounts) {
      countStats = match.arg(arg = countStats, choices = c("all",
                                                           "sig"))
      countType = match.arg(arg = countType, choices = c("all",
                                                         "cooccur", "mutexcl"))
      if (countStats == "sig") {
        w = arrayInd(which(10^-abs(interactions) < max(pvalue)),
                     rep(m, 2))
        for (i in 1:nrow(w)) {
          g1 = rownames(interactions)[w[i, 1]]
          g2 = colnames(interactions)[w[i, 2]]
          g12 = paste(sort(c(g1, g2)), collapse = ", ")
          if (countType == "all") {
            e = sigPairsTblSig[sigPairsTblSig$pValue < max(pvalue)][sigPairsTblSig$pair %in%
                                                       g12, sigPairsTblSig$event_ratio]
          }
          else if (countType == "cooccur") {
            e = sigPairsTblSig[sigPairsTblSig$pValue < max(pvalue)][sigPairsTblSig$Event %in%
                                                       "Co_Occurence"][sigPairsTblSig$pair %in% g12, sigPairsTblSig$`11`]
          }
          else if (countType == "mutexcl") {
            e = sigPairsTblSig[sigPairsTblSig$pValue < max(pvalue)][sigPairsTblSig$Event %in%
                                                       "Mutually_Exclusive"][sigPairsTblSig$pair %in% g12,
                                                                             sigPairsTblSig$`11`]
          }
          if (length(e) == 0) {
            e = 0
          }
          text(w[i, 1], w[i, 2], labels = e, font = 3,
               col = countsFontColor, cex = countsFontSize)
        }
      }
      else if (countStats == "all") {
        w = arrayInd(which(10^-abs(interactions) < max(pvalue)),
                     rep(m, 2))
        w2 = arrayInd(which(10^-abs(interactions) >=
                              max(pvalue)), rep(m, 2))
        w = rbind(w, w2)
        for (i in 1:nrow(w)) {
          g1 = rownames(interactions)[w[i, 1]]
          g2 = colnames(interactions)[w[i, 2]]
          g12 = paste(sort(c(g1, g2)), collapse = ", ")
          if (countType == "all") {
            e = sigPairsTblSig[sigPairsTblSig$pair %in% g12, sigPairsTblSig$event_ratio]
          }
          else if (countType == "cooccur") {
            e = sigPairsTblSig[sigPairsTblSig$pair %in% g12, sigPairsTblSig$`11`]
          }
          else if (countType == "mutexcl") {
            e = sigPairsTblSig[sigPairsTblSig$pair %in% g12, sigPairsTblSig$`01` +
                                 sigPairsTblSig$`10`]
          }
          if (length(e) == 0) {
            e = 0
          }
          text(w[i, 1], w[i, 2], labels = e, font = 3,
               col = countsFontColor, cex = countsFontSize)
        }
      }
    }
    if (showSigSymbols) {
      w = arrayInd(which(10^-abs(interactions) < min(pvalue)),
                   rep(m, 2))
      points(w, pch = pvSymbols[2], col = "black",
             cex = sigSymbolsSize)
      w = arrayInd(which((10^-abs(interactions) < max(pvalue)) &
                           (10^-abs(interactions) > min(pvalue))), rep(m,
                                                                       2))
      points(w, pch = pvSymbols[1], col = "black",
             cex = sigSymbolsSize)
    }
    if (showSigSymbols) {
      points(x = n - nShiftSymbols, y = 0.7 * n, pch = pvSymbols[2],
             cex = sigSymbolsSize)
      text(x = n - nShiftSymbols, y = 0.7 * n, paste0(" P < ",
                                                      min(pvalue)), pos = 4, cex = sigSymbolsFontSize,
           adj = 0)
      points(x = n - nShiftSymbols, y = 0.65 * n, pch = pvSymbols[1],
             cex = sigSymbolsSize)
      text(x = n - nShiftSymbols, y = 0.65 * n, paste0(" P < ",
                                                       max(pvalue)), pos = 4, cex = sigSymbolsFontSize)
    }
    oldpar <- par(no.readonly = TRUE)
    on.exit(par(oldpar))
    par(fig = c(0.4, 0.7, 0, 0.4), new = TRUE)
    image(x = c(0.8, 1), y = seq(0, 1, length.out = 200),
          z = matrix(seq(0, 1, length.out = 200), nrow = 1),
          col = col_pal, xlim = c(0, 1), ylim = c(0, 1), axes = FALSE,
          xlab = NA, ylab = NA)
    atLims = seq(0, 1, length.out = 7)
    axis(side = 4, at = atLims, tcl = -0.15, labels = c("> 3 (Mutually exclusive)",
                                                        2, 1, 0, 1, 2, ">3 (Co-occurence)"), lwd = 0.5,
         cex.axis = sigSymbolsFontSize, line = 0.2)
    text(x = 0.4, y = 0.5, labels = "-log10(P-value)",
         srt = 90, cex = sigSymbolsFontSize, xpd = TRUE)
  }
  if(returnAll){
    return(sigPairsTblSig)}
}




#' @title Drawing Kaplan Meier Survival Curves Using the final survival-related PTMB.
#' @description The function 'plotKMcurves' uses to draw Kaplan-Meier Survival Curves based on PTMB-related riskscore.The riskscore is generated by the signature's PTMB and the coefficient of "Univariate" or "Multivariate" cox regression.
#' @param sig_PTMB The signature's PTMB matrix,which rows are samples and columns are pathways.
#' @param sur A nx2 data frame of samples' survival data,the first line is samples' survival event and the second line is samples' overall survival.
#' @param method Method must be one of "Univariate" and "Multivariate".
#' @param returnAll Logicalvalue.Default is TRUE. If TRUE, return the riskscore and the coefficient of cox regression.
#' @param pval Logical value, a numeric or a string. If logical and TRUE, the p-value is added on the plot. If numeric, than the computet p-value is substituted with the one passed with this parameter. If character, then the customized string appears on the plot.
#' @param color Color to be used for the survival curves.If the number of strata/group (n.strata) = 1, the expected value is the color name. For example color = "blue".If n.strata > 1, the expected value is the grouping variable name. By default, survival curves are colored by strata using the argument color = "strata", but you can also color survival curves by any other grouping variables used to fit the survival curves. In this case, it's possible to specify a custom color palette by using the argument palette.
#' @param palette the color palette to be used. Allowed values include "hue" for the default hue color scale; "grey" for grey color palettes; brewer palettes e.g. "RdBu", "Blues", ...; or custom color palette e.g. c("blue", "red"); and scientific journal palettes from ggsci R package, e.g.: "npg", "aaas", "lancet", "jco", "ucscgb", "uchicago", "simpsons" and "rickandmorty". See details section for more information. Can be also a numeric vector of length(groups); in this case a basic color palette is created using the function palette.
#' @param linetype line types. Allowed values includes i) "strata" for changing linetypes by strata (i.e. groups); ii) a numeric vector (e.g., c(1, 2)) or a character vector c("solid", "dashed").
#' @param conf.int logical value. If TRUE, plots confidence interval.
#' @param pval.method whether to add a text with the test name used for calculating the pvalue, that corresponds to survival curves' comparison - used only when pval=TRUE
#' @param plots logical value.Default is TRUE.If TRUE,plot the Kaplan Meier Survival Curves.
#' @param test.for.trend logical value. Default is FALSE. If TRUE, returns the test for trend p-values. Tests for trend are designed to detect ordered differences in survival curves. That is, for at least one group. The test for trend can be only performed when the number of groups is > 2.
#' @param surv.median.line character vector for drawing a horizontal/vertical line at median survival. Allowed values include one of c("none", "hv", "h", "v"). v: vertical, h:horizontal.
#' @param risk.table Allowed values include:(1)TRUE or FALSE specifying whether to show or not the risk table. Default is FALSE.(2)"absolute" or "percentage". Shows the absolute number and the percentage of subjects at risk by time, respectively.(3)"abs_pct" to show both absolute number and percentage.(4)"nrisk_cumcensor" and "nrisk_cumevents". Show the number at risk and, the cumulative number of censoring and events, respectively.
#' @param cumevents logical value specifying whether to show or not the table of the cumulative number of events. Default is FALSE.
#' @param cumcensor logical value specifying whether to show or not the table of the cumulative number of censoring. Default is FALSE.
#' @param tables.height numeric value (in [0 - 1]) specifying the general height of all tables under the main survival plot.
#' @param add.all a logical value. If TRUE, add the survival curve of pooled patients (null model) onto the main plot.
#' @param ggtheme function, ggplot2 theme name. Default value is theme_survminer. Allowed values include ggplot2 official themes: see theme.
#' @importFrom BiocGenerics sapply
#' @importFrom BiocGenerics lapply
#' @importFrom stats as.formula
#' @importFrom stats median
#' @importFrom survival coxph
#' @importFrom survival Surv
#' @importFrom survival survfit
#' @importFrom BiocGenerics t
#' @importFrom BiocGenerics which
#' @importFrom survminer ggsurvplot
#' @importFrom survminer theme_survminer
#' @export
#' @return Return a list of riskscore and coefficient of cox regression.
#' @examples
#' #get the path of the mutation annotation file and samples' survival data
#' \donttest{maf<-system.file("extdata","data_mutations_extended.txt",package = "pathwayTMB")
#' sur_path<-system.file("extdata","sur.csv",package = "pathwayTMB")
#' sur<-read.csv(sur_path,header=TRUE,row.names = 1)
#' #perform the function 'get_mut_matrix'
#' mut_matrix<-get_mut_matrix(maffile=maf,mut_fre = 0.01,is.TCGA=FALSE,sur=sur)
#' #perform the function `get_PTMB`
#' PTMB_matrix<-get_PTMB(freq_matrix=mut_matrix,genesmbol=genesmbol,gene_path=gene_path)
#' set.seed(1)
#' final_character<-get_final_signature(PTMB=PTMB_matrix,sur=sur)
#' #plot the K-M survival curve
#' plotKMcurves(t(PTMB_matrix[final_character,]),sur=sur,risk.table = TRUE)}
#'
plotKMcurves<-function(sig_PTMB,sur,method="Multivariate",returnAll=TRUE,pval=TRUE,color = NULL,plots=TRUE,
                       palette = NULL,
                       linetype = 1,
                       conf.int = FALSE,
                       pval.method = FALSE,
                       test.for.trend = FALSE,
                       surv.median.line = "none",
                       risk.table = FALSE,
                       cumevents = FALSE,
                       cumcensor = FALSE,
                       tables.height = 0.25,
                       add.all = FALSE,
                       ggtheme = theme_survminer()
){
  #uni_cox
  get_univarCox_result<-function(DE_path_sur){
    covariates<-colnames(DE_path_sur)[1:(length(DE_path_sur[1,])-2)]
    univ_formulas <- sapply(covariates,function(x) as.formula(paste('Surv(survival, event) ~', x)))
    univ_models <- lapply( univ_formulas, function(x){coxph(x, data =DE_path_sur)})

    univ_results <- lapply(univ_models,
                           function(x){
                             x <- summary(x)
                             p.value<-signif(x$wald["pvalue"], digits=2)
                             wald.test<-signif(x$wald["test"], digits=2)
                             beta<-signif(x$coef[1], digits=2);#coeficient beta
                             HR <-signif(x$coef[2], digits=2);#exp(beta)
                             HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                             HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)

                             res<-c(beta, HR, wald.test, p.value)
                             names(res)<-c("beta", "HR", "wald.test",
                                           "p.value")
                             return(res)
                             #return(exp(cbind(coef(x),confint(x))))
                           })
    res <- t(as.data.frame(univ_results, check.names = FALSE))
    result<-as.data.frame(res)
    return(result)
  }
  #multi_cox
  get_multiCox_result<-function(DE_path_sur){
    multi_roc<-coxph(Surv(survival, event) ~.,data = DE_path_sur)
    multi_res<-summary(multi_roc)$coefficients
    return(multi_res)
  }
  inter<-intersect(rownames(sig_PTMB),rownames(sur))
  PTMB_data<-sig_PTMB[inter,]
  sur_data<-sur[inter,]
  final_character<-colnames(PTMB_data)
  final_sur<-as.data.frame(cbind(PTMB_data,event=sur_data[,1],survival=sur_data[,2]))
  rownames(final_sur)<-rownames(PTMB_data)
  DEpathname<-colnames(final_sur)[1:length(final_character)]
  colnames(final_sur)<-c(paste0("a",1:length(final_character)),"event","survival")
  name2name<-cbind(pathname=DEpathname,na=paste0("a",1:length(final_character)))
  rownames(name2name)<-name2name[,2]
  if(method=="Multivariate"){
    uni_res<-get_multiCox_result(final_sur)
  }else if(method=="Univariate"){
    uni_res<-get_univarCox_result(final_sur)
  } else {
    stop("method must be one of 'Univariate'and'Multivariate'.")
  }
  risk_score<-c()
  for(i in 1:length(final_sur[,1])){
    a<-final_sur[i,1:length(final_character)]*as.numeric(uni_res[,1])
    risk_score<-as.data.frame(rbind(risk_score,a))
  }
  risk_score<-rowSums(risk_score)
  names(risk_score)<-rownames(final_sur)
  #plot K-M curves
  inter<-intersect(names(risk_score),rownames(sur_data))
  surdata<-data.frame()
  surdata<-as.data.frame(cbind(sur[inter,],riskscore=risk_score[inter]))
  a<-which(surdata[,dim(surdata)[2]]<median(surdata[,dim(surdata)[2]]))
  surdata[a,dim(surdata)[2]]<-"low_risk"
  surdata[-a,dim(surdata)[2]]<-"high_risk"
  colnames(surdata)<-c("event","survival","riskscore")
  surdata<-as.data.frame(surdata)
  fits<-survfit(Surv(survival, event)~riskscore,data =as.data.frame(surdata))
  if(plots){
   print(ggsurvplot(fits,pval = pval,color = color,
                     palette =palette,
                     linetype = linetype,
                     conf.int = conf.int,
                     pval.method = pval.method ,
                     test.for.trend = test.for.trend ,
                     surv.median.line = surv.median.line,
                     risk.table = risk.table,
                     cumevents = cumevents,
                     cumcensor = cumcensor,
                     tables.height =tables.height,
                     add.all = add.all ,
                     ggtheme =ggtheme,data = surdata))}
  if(returnAll){
    result<-list(res_cox=uni_res,risk_score=risk_score)
    return(result)}
}

#' @title draw an GenePathwayOncoplots
#' @description takes output generated by read.maf and draws an GenePathwayOncoplots.
#' @param maffile an MAF object generated by read.maf.
#' @param gene_path User input pathways geneset list.
#' @param isTCGA Is input MAF file from TCGA source. If TRUE uses only first 12 characters from Tumor_Sample_Barcode.
#' @param top how many top genes to be drawn,genes are arranged from high to low depending on the frequency of mutations. defaults to 20.
#' @param freq_matrix The mutations matrix,generated by `get_mut_matrix`.
#' @param risk_score Samples' PTMB-related risk score,which could be a biomarker for survival analysis and immunotherapy prediction.
#' @param cut_off A threshold value(the median risk score as the default value).Using this value to divide the sample into high and low risk groups with different overall survival.
#' @param clinicalFeatures columns names from 'clinical.data' slot of MAF to be drawn in the plot. Dafault "sample_group".
#' @param annotationColor Custom colors to use for sample annotation-"sample_group". Must be a named list containing a named vector of colors. Default "red" and "green".
#' @param final_character The pathway signature,use to map gene in the GenePathwayOncoplots.
#' @param sortByAnnotation logical sort oncomatrix (samples) by provided 'clinicalFeatures'. Sorts based on first 'clinicalFeatures'. Defaults to TRUE. column-sort.
#' @param removeNonMutated Logical. If TRUE removes samples with no mutations in the GenePathwayOncoplots for better visualization. Default FALSE.
#' @param drawRowBar logical. Plots righ barplot for each gene. Default TRUE.
#' @param drawColBar logical plots top barplot for each sample. Default TRUE.
#' @param leftBarData Data for leftside barplot. Must be a data.frame with two columns containing gene names and values. Default 'NULL'.
#' @param leftBarLims limits for 'leftBarData'. Default 'NULL'.
#' @param rightBarData Data for rightside barplot. Must be a data.frame with two columns containing to gene names and values. Default 'NULL' which draws distibution by variant classification. This option is applicable when only 'drawRowBar' is TRUE.
#' @param rightBarLims limits for 'rightBarData'. Default 'NULL'.
#' @param topBarData Default 'NULL' which draws absolute number of mutation load for each sample. Can be overridden by choosing one clinical indicator(Numeric) or by providing a two column data.frame contaning sample names and values for each sample. This option is applicable when only 'drawColBar' is TRUE.
#' @param logColBar Plot top bar plot on log10 scale. Default FALSE.
#' @param draw_titv logical Includes TiTv plot. Default FALSE
#' @param showTumorSampleBarcodes logical to include sample names.
#' @param fill Logical. If TRUE draws genes and samples as blank grids even when they are not altered.
#' @param showTitle Default TRUE.
#' @param titleText Custom title. Default 'NULL'.
#' @importFrom maftools read.maf
#' @importFrom maftools subsetMaf
#' @importFrom maftools oncoplot
#' @return No return value
#' @export
#' @examples
#' #get the path of the mutation annotation file and samples' survival data
#' maf<-system.file("extdata","data_mutations_extended.txt",package = "pathwayTMB")
#' sur_path<-system.file("extdata","sur.csv",package = "pathwayTMB")
#' sur<-read.csv(sur_path,header=TRUE,row.names = 1)
#' #perform the function 'get_mut_matrix'
#' \donttest{mut_matrix<-get_mut_matrix(maffile=maf,mut_fre = 0.01,is.TCGA=FALSE,sur=sur)
#' #perform the function `get_PTMB`
#' PTMB_matrix<-get_PTMB(freq_matrix=mut_matrix,genesmbol=genesmbol,gene_path=gene_path)
#' set.seed(1)
#' final_character<-get_final_signature(PTMB=PTMB_matrix,sur=sur)}
#' #calculate the risksciore
#' riskscore<-plotKMcurves(t(PTMB_matrix[final_character,]),sur=sur,plots=FALSE)$risk_score
#' cut<-median(riskscore)
#' \donttest{GenePathwayOncoplots(maf,gene_path,mut_matrix,riskscore,cut,final_character)}
#'
GenePathwayOncoplots<-function(maffile,gene_path,freq_matrix,risk_score,cut_off,final_character,isTCGA=FALSE,top=20,clinicalFeatures = "sample_group",annotationColor=c("red","green"),sortByAnnotation = TRUE,removeNonMutated= FALSE,drawRowBar= TRUE,drawColBar= TRUE,leftBarData= NULL,leftBarLims= NULL,rightBarData= NULL,rightBarLims= NULL,topBarData= NULL,logColBar= FALSE,draw_titv= FALSE,showTumorSampleBarcodes= FALSE,fill= TRUE,showTitle = TRUE,
                               titleText = NULL){
  maf<-read.maf(maffile,isTCGA = isTCGA)
  all_sample<-as.data.frame(maf@clinical.data)
  rownames(all_sample)<-all_sample[,1]
  if(isTCGA){
    samples<-substr(gsub(pattern = "\\.",replacement = "\\-",names(risk_score)),1,12)
  }else{
    samples<-names(risk_score)
  }
  b<-c()
  for(i in 1:length(samples)){
    a<-which(grepl(pattern = samples[i],x =all_sample[,1],ignore.case = T))[1]
    b<-c(b,a)
  }
  clinical<-as.data.frame(cbind(all_sample[b,],sample_group=risk_score))
  clinical[,2]<-as.numeric(clinical[,2])
  a<-which(clinical$sample_group<cut_off)
  clinical[a,2]<-"low_risk"
  clinical[-a,2]<-"high_risk"
  colnames(clinical)<-c("Tumor_Sample_Barcode","sample_group")
  maf<-read.maf(maffile,isTCGA = isTCGA,clinicalData = clinical)
  maf<-subsetMaf(maf,tsb = clinical[,1])
  col<-annotationColor
  names(col)<-c("high_risk","low_risk")
  fabcolors<-list(sample_group=col)
  #gene
  b<-gene_path[final_character]
  c<-c()
  for(i in 1:length(b)){
    d<-b[[i]]
    c<-c(c,d)
  }
  genes<-unique(c)
  inter_gene<-intersect(rownames(freq_matrix),genes)
  gene_fre<-apply(freq_matrix,1,function(x){length(which(x!=0))/length(x)})
  inter_gene_fre<-sort(gene_fre[inter_gene],decreasing = T)
  genes<-names(inter_gene_fre)[1:top]
  Genes<-c()
  Pathway<-c()
  for(i in 1:length(final_character)){
    b<-intersect(genes,gene_path[[final_character[i]]])
    c<-rep(final_character[i],length(b))
    Genes<-c(Genes,b)
    Pathway<-c(Pathway,c)
  }
  pathway<-data.frame(Genes=Genes,Pathway=Pathway)
  oncoplot(maf,clinicalFeatures = "sample_group",sortByAnnotation = sortByAnnotation,annotationColor = fabcolors,pathways = pathway,removeNonMutated = removeNonMutated,
           drawRowBar= drawRowBar,drawColBar= drawColBar,leftBarData= leftBarData,leftBarLims= leftBarLims,rightBarData= rightBarData,rightBarLims= rightBarLims,topBarData= topBarData,logColBar= logColBar,draw_titv= draw_titv,
           showTumorSampleBarcodes= showTumorSampleBarcodes,fill= fill,showTitle = showTitle,titleText = titleText)
}


#' @title plot the ROC curve
#' @description This function uses to plot a ROC curve.
#' @param response a factor, numeric or character vector of responses (true class), typically encoded with 0 (controls) and 1 (cases). Only two classes can be used in a ROC curve.
#' @param riskscore a numeric vector of the same length than response, containing the predicted value of each observation.
#' @param main the title of the ROC curve
#' @param add  if TRUE, the ROC curve will be added to an existing plot. If FALSE (default), a new plot will be created.
#' @param col the color of the ROC curve
#' @param legacy.axes a logical indicating if the specificity axis (x axis) must be plotted as as decreasing “specificity” (FALSE) or increasing “1 - specificity” (TRUE, the default) as in most legacy software. This affects only the axis, not the plot coordinates.
#' @param print.auc boolean. Should the numeric value of AUC be printed on the plot?
#' @param grid boolean or numeric vector of length 1 or 2. Should a background grid be added to the plot? Numeric: show a grid with the specified interval between each line; Logical: show the grid or not. Length 1: same values are taken for horizontal and vertical lines. Length 2: grid value for vertical (grid[1]) and horizontal (grid[2]). Note that these values are used to compute grid.v and grid.h. Therefore if you specify a grid.h and grid.v, it will be ignored.
#' @param auc.polygon boolean. Whether or not to display the area as a polygon.
#' @param auc.polygon.col color (col) for the AUC polygon.
#' @param max.auc.polygon boolean. Whether or not to display the maximal possible area as a polygon.
#' @param max.auc.polygon.col color (col) for the maximum AUC polygon.
#' @importFrom pROC roc
#' @importFrom pROC plot.roc
#' @return No return value
#' @export
#' @examples
#' #get the path of the mutation annotation file and samples' survival data
#' maf<-system.file("extdata","data_mutations_extended.txt",package = "pathwayTMB")
#' sur_path<-system.file("extdata","sur.csv",package = "pathwayTMB")
#' sur<-read.csv(sur_path,header=TRUE,row.names = 1)
#' #perform the function 'get_mut_matrix'
#' \donttest{mut_matrix<-get_mut_matrix(maffile=maf,mut_fre = 0.01,is.TCGA=FALSE,sur=sur)
#' #perform the function `get_PTMB`
#' PTMB_matrix<-get_PTMB(freq_matrix=mut_matrix,genesmbol=genesmbol,gene_path=gene_path)
#' set.seed(1)
#' final_character<-get_final_signature(PTMB=PTMB_matrix,sur=sur)}
#' #calculate the risksciore
#' riskscore<-plotKMcurves(t(PTMB_matrix[final_character,]),sur=sur,plots=FALSE)$risk_score
#' #get the path of samples' immunotherapy response data
#' res_path<- system.file("extdata","response.csv",package = "pathwayTMB")
#' response<-read.csv(res_path,header=TRUE,stringsAsFactors =FALSE,row.name=1)
#' plotROC(riskscore=riskscore,response=response,main="Objective Response",print.auc=TRUE)
#'
plotROC<-function(riskscore,response,main,add=FALSE,col=par("col"),legacy.axes=TRUE,print.auc=FALSE,grid=FALSE,auc.polygon=FALSE,auc.polygon.col="skyblue",max.auc.polygon=FALSE,max.auc.polygon.col="#EEEEEE"){
  inter<-intersect(names(riskscore),rownames(response))
  if(length(inter)==0){
    stop("Please input the same sample's data!")
  }
  riskscore<-riskscore[inter]
  response<-response[inter,]
  roc_data<-as.data.frame(cbind(riskscore=riskscore,response=response))
  roc1<-roc(roc_data$response,roc_data$riskscore)
  plot.roc(roc1,add = add,col = col,legacy.axes=legacy.axes,print.auc = print.auc,main=main,grid = grid,auc.polygon=auc.polygon,auc.polygon.col=auc.polygon.col,max.auc.polygon=max.auc.polygon,max.auc.polygon.col=max.auc.polygon.col)
}

Try the pathwayTMB package in your browser

Any scripts or data that you put into this service are public.

pathwayTMB documentation built on Aug. 9, 2022, 5:09 p.m.