Nothing
#' Cross Validations for Lasso Elastic Net Survival predictive models and
#' Classification
#'
#' The function does cross validation for Lasso, Elastic net and Ridge
#' regressions models before the survial analysis and classification.
#' The survival analysis is based on the selected metabolites in the
#' presence or absene of prognostic factors.
#'
#' The function performs the cross validations for Lasso, Elastic net and
#' Ridge regressions models for Cox proportional hazard model. Metabolites
#' are selected at each iteration and then use for the classifier.
#' This implies that predictive metabolites signature is varied from one
#' cross validation to the other depending on selection.
#' The underline idea is to investigate the Hazard Ratio for the train and
#' test data based on the optimal lambda selected for the
#' non-zero shrinkage coefficients, the nonzero selected metabolites will
#' thus be used in the survival analysis and in calculation of the risk
#' scores for each sets of data.
#'
#' @param Survival A vector of survival time with length equals to number of
#' subjects
#' @param Censor A vector of censoring indicator
#' @param Mdata A large or small metabolic profile matrix. A matrix with metabolic profiles where the number of rows should be equal to the number of metabolites and number of columns should be equal to number of patients.
#' @param Prognostic A dataframe containing possible prognostic(s) factor and/or treatment effect to be used in the model.
#' @param Quantile The cut off value for the classifier, default is the median cutoff
#' @param Metlist A list of metabolites to be considered in the model
#' usually smaller than the metabolites in the Mdata .
#' Default is to use all metabolites available and it is advisable
#' to be greater than 17.
#' @param Standardize A Logical flag for the standardization of the
#' metabolite matrix, prior to fitting the model sequence.
#' The coefficients are always returned on the original scale.
#' Default is standardize=TRUE.
#' @param Reduce A boolean parameter indicating if the metabolic profile
#' matrix should be reduced, default is TRUE and larger metabolic profile
#' matrix is reduced by supervised pca approach and first pca is extracted
#' from the reduced matrix to be used in the classifier.
#' @param Select Number of metabolites (default is 15) to be selected
#' from supervised PCA. This is valid only if the argument Reduce=TRUE
#' @param Alpha The mixing parameter for glmnet
#' (see \code{\link[glmnet]{glmnet}}). The range is 0<= Alpha <= 1.
#' The Default is 1
#' @param Fold number of folds to be used for the cross validation.
#' Its value ranges between 3 and the numbe rof subjects in the dataset
#' @param Ncv Number of validations to be carried out. The default is 25.
#' @param nlambda The number of lambda values - default is 100 as in glmnet.
#' @return A object of class \code{\link[MetabolicSurv]{cvle}} is
#' returned with the following values \itemize{
#' \item{Coef.mat}{ :A matrix of coefficients with rows equals to number
#' of cross validations and columns equals to number of metabolites.}
#' \item{Runtime}{A vector of runtime for each iteration measured in
#' seconds.}
#' \item{lambda}{A vector of estimated optimum lambda for each iterations.}
#' \item{n}{A vector of the number of selected metabolites}
#' \item{HRTrain}{A matrix of survival information for the training
#' dataset. It has three columns representing the estimated HR, the
#' 95% lower confidence interval and the 95% upper confidence interval.}
#' \item{HRTest}{A matrix of survival information for the test dataset.
#' It has three columns representing the estimated HR, the 95%
#' lower confidence interval and the 95% upper confidence interval.}
#' \item{pld}{A vector of partial likelihood deviance at each cross
#' validations.}
#' \item{Met.mat}{A matrix with 0 and 1. Number of rows equals to
#' number of iterations and number of columns equals to number of
#' metabolites. 1 indicates that the particular metabolite was
#' selected or had nonzero coefficient and otherwise it is zero.}
#' \item{Mdata}{The Metabolite data matrix that was used for the
#' analysis either same as Mdata or a reduced version.}
#' }
#' @author Olajumoke Evangelina Owokotomo, \email{olajumoke.owokotomo@@uhasselt.be}
#' @author Ziv Shkedy
#' @seealso \code{\link[survival]{coxph}},
#' \code{\link[MetabolicSurv]{EstimateHR}}, \code{\link[glmnet]{glmnet}},
#' \code{\link[MetabolicSurv]{Lasoelacox}}
#' @examples
#' \donttest{
#' ## FIRSTLY SIMULATING A METABOLIC SURVIVAL DATA
#' Data = MSData(nPatients = 100, nMet = 150, Prop = 0.5)
#'
#' ## USING THE FUNCTION
#' Results = CVLasoelacox(Survival = Data$Survival,Censor = Data$Censor,
#' Mdata = t(Data$Mdata),Prognostic = Data$Prognostic, Quantile = 0.5,
#' Metlist = NULL,Standardize = TRUE, Reduce=FALSE, Select=15,
#' Alpha = 1,Fold = 4,Ncv = 10,nlambda = 100)
#'
#' ## NUMBER OF SELECTED METABOLITES PER CV
#' Results@n
#'
#' ## GET THE MATRIX OF COEFFICIENTS
#' Results@Coef.mat
#'
#' ## SURVIVAL INFORMATION OF THE TRAIN DATASET
#' Results@HRTrain
#'
#' ## SURVIVAL INFORMATION OF THE TEST DATASET
#' Results@HRTest
#' }
#' @export CVLasoelacox
#' @import superpc
#' @import stats
#' @import methods
#' @import grDevices
#' @import graphics
CVLasoelacox <- function (Survival,
Censor,
Mdata,
Prognostic,
Quantile = 0.5,
Metlist = NULL,
Standardize = TRUE,
Reduce=TRUE,
Select=15,
Alpha = 1,
Fold = 4,
Ncv = 10,
nlambda = 100)
{
if (missing(Survival)) stop("Argument 'Survival' is missing...")
if (missing(Mdata)) stop("Argument 'Mdata' is missing...")
if (missing(Censor)) stop("Argument 'Censor' is missing...")
if(is.null(Metlist)){
Mdata <- Mdata
} else {
Mdata <- Mdata[is.element(rownames(Mdata),Metlist),]
}
if (Reduce) {
Dat <- Mdata
DataForReduction<-list(x=Dat,y=Survival, censoring.status=Censor, metabolitenames=rownames(Dat))
TentativeList<-names(sort(abs(superpc::superpc.train(DataForReduction, type="survival")$feature.scores),decreasing =TRUE))[1:Select]
Mdata<-Dat[TentativeList,]
} else {
Mdata<-Mdata
}
metnames <- rownames(Mdata)
n.mets<-nrow(Mdata)
n.patients<-ncol(Mdata)
Run.Time<-rep(NA, Ncv)
n.train<-(n.patients-floor(n.patients/Fold))
n.test<-floor(n.patients/Fold)
cv.train <-matrix(0,Ncv,n.train)
cv.test <-matrix(0,Ncv,n.test)
n.g<-rep(0, Ncv)
#optimum lambda
lambda<-rep(NA, Ncv)
pld<-rep(NA, Ncv)
#HR--------
HRTrain<-matrix(NA,nrow=Ncv,ncol=3)
HRTest<-matrix(NA,nrow=Ncv,ncol=3)
if(is.null(Prognostic)){
Data <- Mdata
Data.Full <- t(Mdata)
Penalty <- rep(1,nrow(Data.Full))
} else{
Data <- t(Mdata)
Data.Full <- cbind(Data,Prognostic)
Penalty <- c(rep(1,ncol(Data)),rep(0,ncol(Prognostic)))
}
# Survival times must be larger than 0
Survival[Survival <= 0] <- stats::quantile(Survival, probs = 0.01)
perPrognostic<-NULL
coef.mat<-met.mat<-matrix(0,nrow=Ncv,ncol=nrow(Mdata))
pIndex <- c(1:n.patients)
for (i in 1:Ncv){
message('Cross validation loop ',i)
cv.train[i,] <-sort(sample(pIndex,n.train,replace=F) )
cv.test[i,] <-c(1:n.patients)[-c(intersect(cv.train[i,] ,c(1:n.patients)))]
Stime=Survival[cv.train[i,]]
sen= Censor[cv.train[i,]]
Data.Full2 <-Data.Full[cv.train[i,],]
Run.Time[i] <-system.time(Lasso.Cox.CV <- glmnet::cv.glmnet(x = as.matrix(Data.Full2),y = survival::Surv(as.vector(Stime),as.vector(sen) == 1),
family = 'cox',
alpha = Alpha,
nfolds= Fold,
nlambda = nlambda,
penalty.factor = Penalty,
standardize = Standardize))[3]
# Results of the cv.glmnet procedure
Lambda <- Lasso.Cox.CV$lambda.min
Alllamda <- Lasso.Cox.CV$lambda
Coefficients <- stats::coef(Lasso.Cox.CV, s =Lambda)
Coefficients.NonZero <- Coefficients[Coefficients[, 1] != 0, ]
pld[i]<-Lasso.Cox.CV$cvm[Lasso.Cox.CV$lambda==Lasso.Cox.CV$lambda.min] # partial likelihood deviance
if (!is.null(dim(Coefficients.NonZero)))
{
Selected.mets <- setdiff(colnames(Data.Full2),colnames(Prognostic))
Coefficients.NonZero <- Coefficients[c(Selected.mets,colnames(Prognostic)),]
Select <- "F"
}else{
Select <- "T"
}
if (is.null(Prognostic))
{
Selected.mets <- names(Coefficients.NonZero)
}else{
Selected.mets <- setdiff(names(Coefficients.NonZero), colnames(Prognostic))
}
n.g[i] <- length(Selected.mets)
# What to do if no metabolite is selected?
# Decrease lambda until a metabolite is selected
# Going through the list of lambda values and repeat the above procedure
Lambda <- Lasso.Cox.CV$lambda.min
Lambda.Sequence <- Lasso.Cox.CV$lambda
Lambda.Index <- which(Lambda.Sequence == Lasso.Cox.CV$lambda.min)
Lambda.Index.Add = 0
while (n.g[i] <= 0) {
Lambda.Index.Add <- Lambda.Index.Add + 1
Coefficients <- stats::coef(Lasso.Cox.CV, s = 0.12)
Coefficients.NonZero <- Coefficients[Coefficients[, 1] != 0,]
if (!is.null(dim(Coefficients.NonZero)))
{
Selected.mets <- setdiff(colnames(Data.Full),colnames(Prognostic))
Coefficients.NonZero <- Coefficients[c(Selected.mets,colnames(Prognostic)),]
Select <- "F"
}else{
Select <- "T"
}
if (is.null(Prognostic))
{
Selected.mets <- names(Coefficients.NonZero)
}
else
{
Selected.mets <- setdiff(names(Coefficients.NonZero), colnames(Prognostic))
}
Lambda <- Lambda.Sequence[Lambda.Index + Lambda.Index.Add]
n.g[i] <- length(Selected.mets)
}
lambda[i]<-Lambda
met.mat[i,is.element(rownames(Mdata),Selected.mets)]<-1
coef.mat[i,is.element(rownames(Mdata),Selected.mets)]<-Coefficients.NonZero[Selected.mets]
scores.train <- as.vector(Coefficients.NonZero[Selected.mets] %*% t(Data[cv.train[i,],Selected.mets]))
scores.test <- as.vector(Coefficients.NonZero[Selected.mets] %*% t(Data[cv.test[i,],Selected.mets]))
#######################
## train set ###########
Sdata<-data.frame(Survival=Survival[cv.train[i,]],Censor=Censor[cv.train[i,]])
if (!is.null(Prognostic)) perPrognostic<-as.data.frame(Prognostic[cv.train[i,],])
colnames(perPrognostic) <- colnames(Prognostic)
Results1<-EstimateHR(Risk.Scores=scores.train, Data.Survival =Sdata, Prognostic = perPrognostic, Plots = FALSE, Quantile = Quantile)
if (!is.null(Prognostic)) HRTrain[i,]<-(summary(Results1$SurvResult)[[8]])[1,][-2]
if ( is.null(Prognostic)) HRTrain[i,]<- summary(Results1$SurvResult)[[8]][-2]
#######################
## test set ###########
Sdata<-data.frame(Survival=Survival[cv.test[i,]],Censor=Censor[cv.test[i,]])
if (!is.null(Prognostic)) perPrognostic<-as.data.frame(Prognostic[cv.test[i,],])
colnames(perPrognostic) <- colnames(Prognostic)
Results2<-EstimateHR(Risk.Scores =scores.test, Data.Survival = Sdata, Prognostic = perPrognostic, Plots = FALSE, Quantile = Quantile)
if (!is.null(Prognostic)) HRTest[i,]<-(summary(Results2$SurvResult)[[8]])[1,][-2]
if ( is.null(Prognostic)) HRTest[i,]<- summary(Results2$SurvResult)[[8]][-2]
}
return(cvle(Coef.mat=coef.mat,Runtime=Run.Time,
lambda=lambda,n=n.g,Met.mat=met.mat,
HRTrain=HRTrain,HRTest=HRTest,pld=pld,
Mdata=Mdata))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.