R/UniD.pred.R

Defines functions UniD.pred

Documented in UniD.pred

#' Diagnostic Prediction
#'
#' @description Used to predict other genomic information, including:
#' (1) chromosome 1p/19q co-deletion status; (2) IDH mutation status; (3)
#' ATRX mutation status; (4) TERT promoter mutation status; (5) TCGA gene
#' expression subtype; (6) MGMT expression level by tumor. When applying
#' the prediction models, missing values may exist in the input data. The
#' number of missing values will be checked. If high proportion of input
#' data are missing values, the predicted results may have quality issues.
#'
#' @param inputdata input data with columns as samples and rows as
#' probes, could be either Beta.raw generated from \code{UniD.dataqc} or
#' Beta.clean generated from \code{UniD.probefilter}.
#' @param inputvalueType indicate the inputdata value type. Can be "B"
#' or "M". By default is Beta value "B".
#' @param inputdata.BMIQ input data after BMIQ normalized with columns
#' as samples and rows as probes. Must be Beta value. Suggest to be the
#' Beta.BMIQ generated by \code{UniD.BMIQ}.
#' @param Pred.1p19q whether perform chromosome 1p/19q co-deletion
#' status prediction. Default is TRUE.
#' @param Pred.IDH whether perform IDH mutation status prediction,
#' default is TRUE.
#' @param Pred.ATRX whehter perform ATRX mutation status prediction,
#' default is TRUE.
#' @param Pred.TERTp whehter perform TERT promoter mutation status
#' prediction, default is TRUE.
#' @param Pred.ExpressSubtype whether perform TCGA gene expression
#' subtype prediction, including Classical/C, Mesenchymal/M, and Proneural/P.
#' Default is TRUE.
#' @param outDir The directory where the prediction result saved
#' @param write wehther save the data generated within the function
#' @return A data frame with predicted results, rows are samples, columns
#' are each predicted genomic information
#' @examples
#' \dontrun{
#' Pred <- UniD.pred(inputdata = Beta.raw, inputvalueType = "B", inputdata.BMIQ
#' = Beta.BMIQ, Pred.1p19q = T, Pred.IDH = T, Pred.ATRX = T, Pred.TERTp = T,
#' Pred.ExpressSubtype = T, Pred.MGMTExpress = T, outDir = "~/Desktop/output/",
#' write = T)
#' }
#' @import impute
#' @importFrom plyr revalue
#' @importFrom glmnet predict.glmnet
#' @importFrom  mlr makeLearner predictLearner
#' @import mlr
#' @import methods

#' @export
UniD.pred <- function(inputdata,
                      inputvalueType = "B",
                      inputdata.BMIQ,
                      Pred.1p19q = TRUE,
                      Pred.IDH = TRUE,
                      Pred.ATRX = TRUE,
                      Pred.TERTp = TRUE,
                      Pred.ExpressSubtype = TRUE,
                      outDir,
                      write)
{
  message("")
  message("====Biomarker Prediction Start====")

  message("#Note: inputdata and inputdata.BMIQ must with columns as samples
          and rows as probes")
  message("#Note: inputdata.BMIQ must be Beta value format")
  message("")

  Pred <- data.frame(sample = colnames(inputdata))
  if(Pred.1p19q){
    message("##Predict Chromosome 1p19q co-deletion Start##")
    inputdata.1p19q <- TrimData(probeNames = codel_probeNames,
                                inputdata = inputdata)
    inputdata.1p19q <- ChangeDataType(inputvalueType, TargetValueType = "M",
                                      data = inputdata.1p19q)
    inputdata.1p19q.NA <- apply(inputdata.1p19q, 2, function(x) sum(is.na(x)))/
      length(codel_probeNames)
    inputdata.1p19q.impute <- imputeData(inputdata.1p19q)

    Pred_1p19q <- predict(codel_model, newx = t(inputdata.1p19q.impute),
                           s = 0.1,type = "class")
    Pred_1p19q <- data.frame(pred.1p19q.codel = Pred_1p19q[,1],
                             missing.probe.1p19q = inputdata.1p19q.NA)
    Pred <- cbind(Pred, Pred_1p19q)
    message("#Predict Chromosome 1p19q co-deletion Finish##")
    message("")
  }

  if(Pred.IDH){
    message("##Predict IDH mutation status Start##")
    inputdata.IDH <- TrimData(probeNames = IDH_probeNames,
                              inputdata = inputdata)
    inputdata.IDH <- ChangeDataType(inputvalueType, TargetValueType = "M",
                                    data = inputdata.IDH)
    inputdata.IDH.NA <- apply(inputdata.IDH, 2, function(x) sum(is.na(x)))/
      length(IDH_probeNames)
    inputdata.IDH.impute <- imputeData(inputdata.IDH)

    Pred_IDH <- predict(IDH_model, newx = t(inputdata.IDH.impute),
                        type = "class", s = 0.1)
    Pred_IDH <- data.frame(pred.IDH = Pred_IDH[,1], missing.probe.IDH
                           = inputdata.IDH.NA)
    
    
    Pred <- cbind(Pred, Pred_IDH)
    message("##Predict IDH mutation status Finish##")
    message("")
  }

  if(Pred.ATRX){
    message("##Predict ATRX mutation status Start##")
    inputdata.ATRX <- TrimData(probeNames = ATRX_probeNames, inputdata
                               = inputdata)
    inputdata.ATRX <- ChangeDataType(inputvalueType, TargetValueType = "M",
                                     data = inputdata.ATRX)
    inputdata.ATRX.NA <- apply(inputdata.ATRX, 2, function(x) sum(is.na(x)))/
      length(ATRX_probeNames)
    inputdata.ATRX.impute <- imputeData(inputdata.ATRX)

    Pred_ATRX <- predict(ATRX_model, newx = t(inputdata.ATRX.impute),
                         type = "class", s = 0.1)
    Pred_ATRX <- data.frame(pred.ATRX = Pred_ATRX[,1], missing.probe.ATRX
                            = inputdata.ATRX.NA)
    Pred <- cbind(Pred, Pred_ATRX)
    message("##Predict ATRX mutation status Finish##")
    message("")
  }

  if(Pred.TERTp){
    message("##Predict TERT promoter mutation status Start##")
    inputdata.TERTp <- TrimData(probeNames = TERT_probeNames,  inputdata
                                = inputdata)
    inputdata.TERTp <- ChangeDataType(inputvalueType, TargetValueType = "M",
                                      data = inputdata.TERTp)
    inputdata.TERTp.NA <- apply(inputdata.TERTp, 2, function(x) sum(is.na(x)))/
      length(TERT_probeNames)
    inputdata.TERTp.impute <- imputeData(inputdata.TERTp)

    Pred_TERTp <- predict(TERT_model, newx = t(inputdata.TERTp.impute),
                          type = "class", s = 0.1)
    Pred_TERTp <- data.frame(pred.TERTp = Pred_TERTp[,1], missing.probe.TERTp
                             = inputdata.TERTp.NA)
    Pred <- cbind(Pred, Pred_TERTp)
    message("##Predict TERT promoter mutation status Finish##")
    message("")
  }

  if(Pred.ExpressSubtype){
    message("##Predict TCGA Gene Expression subtype Start##")
    #trim data
    inputdata.subtype <- TrimData(probeNames = subtype_probeNames, inputdata
                                  = inputdata.BMIQ)
    inputdata.subtype <- ChangeDataType(inputvalueType, TargetValueType = "M",
                                        data = inputdata.subtype)
    inputdata.subtype.NA <- apply(inputdata.subtype, 2, function(x)
      sum(is.na(x)))/length(subtype_probeNames)
    inputdata.subtype.impute <- imputeData(inputdata.subtype)

    #use models, because involved 100 models
    pred.subtype.list <- list()
    classif.lrn= makeLearner("classif.randomForest", predict.type = "prob")
    for(i in 1:100){
      pred.subtype.list[[i]] <- mlr::predictLearner(classif.lrn,subtype_model[[i]],
                                        data.frame(t(inputdata.subtype.impute)))
    }

    pred.subtype.final <- matrix(ncol = 4, nrow = ncol(inputdata.subtype.impute))
    colnames(pred.subtype.final) <- c("Pred.subtype","C_prob","M_prob","P_prob")
    rownames(pred.subtype.final) <- colnames(inputdata.subtype.impute)

    for(i in 1:ncol(inputdata.subtype.impute)){
      print(i)
      summary <- do.call(rbind,lapply(pred.subtype.list, function(x) x[i,]))
      pred.subtype.final[i,2:4] <- apply(summary[,1:3], 2, function(x) mean(x))
      pred.subtype.final[i,1] <- names(which.max(table(max.col(summary))))
    }

    Pred_subtype <- as.data.frame(pred.subtype.final)
    Pred_subtype$Pred.subtype <- revalue(Pred_subtype$Pred.subtype,
                                         c("1" = "Classical",
                                           "2" = "Mesenchymal",
                                           "3" = "Proneural"))
    missing.probe.subtype <- inputdata.subtype.NA
    Pred_subtype <- cbind(Pred_subtype, missing.probe.subtype)
    Pred <- cbind(Pred, Pred_subtype)
    message("##Predict TCGA Gene Expression subtype Finish##")
    message("")
  }

  #if(Pred.MGMTExpress){} # not ready yet

  if(write){
    write.table(Pred, file = paste0(outDir, "/UniD_Biomarker.Pred.csv"),
                sep = ",", quote = F, col.names = NA)
    message(paste0("Predicted result was saved as: ", outDir, "/UniD_Biomarker.Pred.csv"))
  }

  message("====Biomarker Prediction Finish====")
  message("")
  return(Pred)
}
JieYang031/UniD documentation built on May 5, 2021, 5:16 p.m.