#' Diagnostic Prediction
#'
#' @description Used to predict other genomic information, including:
#' (1) chromosome 1p/19q co-deletion status; (2) IDH mutation status; (3)
#' ATRX mutation status; (4) TERT promoter mutation status; (5) TCGA gene
#' expression subtype; (6) MGMT expression level by tumor. When applying
#' the prediction models, missing values may exist in the input data. The
#' number of missing values will be checked. If high proportion of input
#' data are missing values, the predicted results may have quality issues.
#'
#' @param inputdata input data with columns as samples and rows as
#' probes, could be either Beta.raw generated from \code{UniD.dataqc} or
#' Beta.clean generated from \code{UniD.probefilter}.
#' @param inputvalueType indicate the inputdata value type. Can be "B"
#' or "M". By default is Beta value "B".
#' @param inputdata.BMIQ input data after BMIQ normalized with columns
#' as samples and rows as probes. Must be Beta value. Suggest to be the
#' Beta.BMIQ generated by \code{UniD.BMIQ}.
#' @param Pred.1p19q whether perform chromosome 1p/19q co-deletion
#' status prediction. Default is TRUE.
#' @param Pred.IDH whether perform IDH mutation status prediction,
#' default is TRUE.
#' @param Pred.ATRX whehter perform ATRX mutation status prediction,
#' default is TRUE.
#' @param Pred.TERTp whehter perform TERT promoter mutation status
#' prediction, default is TRUE.
#' @param Pred.ExpressSubtype whether perform TCGA gene expression
#' subtype prediction, including Classical/C, Mesenchymal/M, and Proneural/P.
#' Default is TRUE.
#' @param outDir The directory where the prediction result saved
#' @param write wehther save the data generated within the function
#' @return A data frame with predicted results, rows are samples, columns
#' are each predicted genomic information
#' @examples
#' \dontrun{
#' Pred <- UniD.pred(inputdata = Beta.raw, inputvalueType = "B", inputdata.BMIQ
#' = Beta.BMIQ, Pred.1p19q = T, Pred.IDH = T, Pred.ATRX = T, Pred.TERTp = T,
#' Pred.ExpressSubtype = T, Pred.MGMTExpress = T, outDir = "~/Desktop/output/",
#' write = T)
#' }
#' @import impute
#' @importFrom plyr revalue
#' @importFrom glmnet predict.glmnet
#' @importFrom mlr makeLearner predictLearner
#' @import mlr
#' @import methods
#' @export
UniD.pred <- function(inputdata,
inputvalueType = "B",
inputdata.BMIQ,
Pred.1p19q = TRUE,
Pred.IDH = TRUE,
Pred.ATRX = TRUE,
Pred.TERTp = TRUE,
Pred.ExpressSubtype = TRUE,
outDir,
write)
{
message("")
message("====Biomarker Prediction Start====")
message("#Note: inputdata and inputdata.BMIQ must with columns as samples
and rows as probes")
message("#Note: inputdata.BMIQ must be Beta value format")
message("")
Pred <- data.frame(sample = colnames(inputdata))
if(Pred.1p19q){
message("##Predict Chromosome 1p19q co-deletion Start##")
inputdata.1p19q <- TrimData(probeNames = codel_probeNames,
inputdata = inputdata)
inputdata.1p19q <- ChangeDataType(inputvalueType, TargetValueType = "M",
data = inputdata.1p19q)
inputdata.1p19q.NA <- apply(inputdata.1p19q, 2, function(x) sum(is.na(x)))/
length(codel_probeNames)
inputdata.1p19q.impute <- imputeData(inputdata.1p19q)
Pred_1p19q <- predict(codel_model, newx = t(inputdata.1p19q.impute),
s = 0.1,type = "class")
Pred_1p19q <- data.frame(pred.1p19q.codel = Pred_1p19q[,1],
missing.probe.1p19q = inputdata.1p19q.NA)
Pred <- cbind(Pred, Pred_1p19q)
message("#Predict Chromosome 1p19q co-deletion Finish##")
message("")
}
if(Pred.IDH){
message("##Predict IDH mutation status Start##")
inputdata.IDH <- TrimData(probeNames = IDH_probeNames,
inputdata = inputdata)
inputdata.IDH <- ChangeDataType(inputvalueType, TargetValueType = "M",
data = inputdata.IDH)
inputdata.IDH.NA <- apply(inputdata.IDH, 2, function(x) sum(is.na(x)))/
length(IDH_probeNames)
inputdata.IDH.impute <- imputeData(inputdata.IDH)
Pred_IDH <- predict(IDH_model, newx = t(inputdata.IDH.impute),
type = "class", s = 0.1)
Pred_IDH <- data.frame(pred.IDH = Pred_IDH[,1], missing.probe.IDH
= inputdata.IDH.NA)
Pred <- cbind(Pred, Pred_IDH)
message("##Predict IDH mutation status Finish##")
message("")
}
if(Pred.ATRX){
message("##Predict ATRX mutation status Start##")
inputdata.ATRX <- TrimData(probeNames = ATRX_probeNames, inputdata
= inputdata)
inputdata.ATRX <- ChangeDataType(inputvalueType, TargetValueType = "M",
data = inputdata.ATRX)
inputdata.ATRX.NA <- apply(inputdata.ATRX, 2, function(x) sum(is.na(x)))/
length(ATRX_probeNames)
inputdata.ATRX.impute <- imputeData(inputdata.ATRX)
Pred_ATRX <- predict(ATRX_model, newx = t(inputdata.ATRX.impute),
type = "class", s = 0.1)
Pred_ATRX <- data.frame(pred.ATRX = Pred_ATRX[,1], missing.probe.ATRX
= inputdata.ATRX.NA)
Pred <- cbind(Pred, Pred_ATRX)
message("##Predict ATRX mutation status Finish##")
message("")
}
if(Pred.TERTp){
message("##Predict TERT promoter mutation status Start##")
inputdata.TERTp <- TrimData(probeNames = TERT_probeNames, inputdata
= inputdata)
inputdata.TERTp <- ChangeDataType(inputvalueType, TargetValueType = "M",
data = inputdata.TERTp)
inputdata.TERTp.NA <- apply(inputdata.TERTp, 2, function(x) sum(is.na(x)))/
length(TERT_probeNames)
inputdata.TERTp.impute <- imputeData(inputdata.TERTp)
Pred_TERTp <- predict(TERT_model, newx = t(inputdata.TERTp.impute),
type = "class", s = 0.1)
Pred_TERTp <- data.frame(pred.TERTp = Pred_TERTp[,1], missing.probe.TERTp
= inputdata.TERTp.NA)
Pred <- cbind(Pred, Pred_TERTp)
message("##Predict TERT promoter mutation status Finish##")
message("")
}
if(Pred.ExpressSubtype){
message("##Predict TCGA Gene Expression subtype Start##")
#trim data
inputdata.subtype <- TrimData(probeNames = subtype_probeNames, inputdata
= inputdata.BMIQ)
inputdata.subtype <- ChangeDataType(inputvalueType, TargetValueType = "M",
data = inputdata.subtype)
inputdata.subtype.NA <- apply(inputdata.subtype, 2, function(x)
sum(is.na(x)))/length(subtype_probeNames)
inputdata.subtype.impute <- imputeData(inputdata.subtype)
#use models, because involved 100 models
pred.subtype.list <- list()
classif.lrn= makeLearner("classif.randomForest", predict.type = "prob")
for(i in 1:100){
pred.subtype.list[[i]] <- mlr::predictLearner(classif.lrn,subtype_model[[i]],
data.frame(t(inputdata.subtype.impute)))
}
pred.subtype.final <- matrix(ncol = 4, nrow = ncol(inputdata.subtype.impute))
colnames(pred.subtype.final) <- c("Pred.subtype","C_prob","M_prob","P_prob")
rownames(pred.subtype.final) <- colnames(inputdata.subtype.impute)
for(i in 1:ncol(inputdata.subtype.impute)){
print(i)
summary <- do.call(rbind,lapply(pred.subtype.list, function(x) x[i,]))
pred.subtype.final[i,2:4] <- apply(summary[,1:3], 2, function(x) mean(x))
pred.subtype.final[i,1] <- names(which.max(table(max.col(summary))))
}
Pred_subtype <- as.data.frame(pred.subtype.final)
Pred_subtype$Pred.subtype <- revalue(Pred_subtype$Pred.subtype,
c("1" = "Classical",
"2" = "Mesenchymal",
"3" = "Proneural"))
missing.probe.subtype <- inputdata.subtype.NA
Pred_subtype <- cbind(Pred_subtype, missing.probe.subtype)
Pred <- cbind(Pred, Pred_subtype)
message("##Predict TCGA Gene Expression subtype Finish##")
message("")
}
#if(Pred.MGMTExpress){} # not ready yet
if(write){
write.table(Pred, file = paste0(outDir, "/UniD_Biomarker.Pred.csv"),
sep = ",", quote = F, col.names = NA)
message(paste0("Predicted result was saved as: ", outDir, "/UniD_Biomarker.Pred.csv"))
}
message("====Biomarker Prediction Finish====")
message("")
return(Pred)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.