#' Random split for glmnet
#'
#' Computes random split validation for glmnet, produces a plot, and returns a value for lambda
#' @import zeallot
#' @param X_md is the dataset matrix with samples on rows and MDs on columns.
#' @param X_ge is the dataset matrix with samples on rows and genes on columns.
#' @param y is the numeric vector of response variables.
#' @param type.transformation is a string indicating the type of transformation to be performed at the data before fitting the model.
#' It can be one of the following: "none","abs","abs.alpha.power","mul","log.abs","log.abs.alpha.power"; none is the default parameter.
#' @param alpha_values is a numeric vector of alpha value to be used in the abs.alpha.power transformation. Default value is c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2)
#' @param cost is a numeric constant used when type.transformation = mul.
#' @param bb is the base of the logarithm used when type.transformation = log.abs or log.abs.alpha.power
#' @param type.solution is a string indicating the type of solution to compute. Possible values are: min, uci and lci; if standard min or max of the average CV function.
#' if uci, take the the most parsimonous solution within the tot percentage of confidence bands around the standard solution. if lci a less partimosious solution within the tot percentage of confidence bands around the standard solution is selected
#' @param measure is the measure used to perform the choice of the optimal lambda value. Possible values are MSE and R2. Default value is MSE
#' @param intercept is a boolean valus indicating if we want to fit or not the intercept. Default valuw is TRUE
#' @param th is the size of the confidence interval. Default value is 1.96
#' @param n.splits is the number of random split to be performed. Default value is 25
#' @param inner.train.prop is the percentage of samples from the train test to be used as training set in the random-split method. Default value is 0.9 (90)
#' @param nLambda number of lambda to be tested in the LASSO model
#' @param n.splits.val is the number of random split to compute validation metrics. Default value is 25
#' @param n.splits.scr is the number of random split to perform the y-scrambling test. Default value is 25
#' @param nCores is the number of cores to be used
#' @return an object of class tqsar containing a list of final models,
#' a list of williams plot object
#' a dataframe with all the metrices computed for the different models, and
#' a list of lambda values used to train the LASSO model.
#' \item{finalModels}{a list with one or more models coming from the RCVLasso function}
#' \item{williams_plots}{a list of one or more williams plot objects}
#' \item{Metrics}{a dataframe with all the internal and external metrics estimated for every model}
#' \item{lambda}{a list of one or more vector of lambda used to tune the LASSO models}
#' @keywords qsar
#' @export
hqsar = function(X_md_train,X_md_test, X_ge_train, X_ge_test,y_train, y_test,
type.transformation.md = "none",type.transformation.ge = "none",
alpha = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
alpha1 = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
cost.md=3,cost.ge=3, bb.md = 2,bb.ge=2, type.solution="lci", measure="MSE", intercept= TRUE, th = 1.96,
n.splits = 99,inner.train.prop=0.9, nLambda = 100, n.splits.val=25, n.splits.scr=25,
nCores = 30){
if(!type.transformation.md %in% c("none","abs","abs.alpha.power","log.abs","log.abs.alpha.power")) #mul
stop("'type.transformation.md' must be one of the following: none,abs,abs.alpha.power,log.abs,log.abs.alpha.power")
if(!type.transformation.ge %in% c("none","abs","abs.alpha.power","log.abs","log.abs.alpha.power")) #mul
stop("'type.transformation.ge' must be one of the following: none,abs,abs.alpha.power,log.abs,log.abs.alpha.power")
if(!measure %in% c("MSE","R2"))
stop("'measure' must be one of the following: 'MSE','R2'")
if(!type.solution %in% c("uci","lci","min"))
stop("'type.solution' must be one of the following: 'uci','lci','min'")
if(inner.train.prop < 0 | inner.train.prop >1 ) stop("'inner.train.prop' must be a number between 0 and 1")
set.seed(1)
md_list = colnames(X_md)
gene_list = colnames(X_ge)
if(type.transformation.ge == type.transformation.md){
type.transformation = type.transformation.ge
print("Apply Same Transformation")
if(type.transformation == "none" | type.transformation == "abs" | type.transformation == "mul" | type.transformation == "log.abs"){
X_train_t = cbind(transform_data(X_md_train,type.transformation.md, bb=bb.md,cost = cost.md),
transform_data(X_ge_train,type.transformation.ge, bb=bb.ge,cost = cost.ge))
X_test_t = cbind(transform_data(X_md_test,type.transformation.md, bb=bb.md,cost = cost.md),
transform_data(X_ge_test,type.transformation.ge, bb=bb.ge,cost = cost.ge))
L <- lambda.grid(y=y_train, x=as.matrix(X_train_t), nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = as.matrix(X_train_t) , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t), X_test = as.matrix(X_test_t), y_train, y_test, md_list=md_list, gene_list=gene_list,inner.train.prop,nPermValm = n.splits.val,nPermScr = n.splits.scr)
final_models = list(u2f=u2f)
WP_list = list(WP=WP)
lambda_list = list(L=L)
y_pred_tr_list = list(y_pred_train=y_pred_train)
y_pred_te_list = list(y_pred_test=y_pred_test)
Metrics = Me
}
if(type.transformation == "abs.alpha.power" | type.transformation == "log.abs.alpha.power"){
Metrics = data.frame(md_pow = 0, ge_pow = 0,nMD = 0, nGE = 0, MSE_tr = 0, MSE_te=0,R2_tr = 0,R2_te = 0,
Q2 = 0,CCC = 0,Q2F1 = 0,Q2F2 = 0, Q2F3 = 0, #rm2 = 0,
rm2 = 0,dr2m = 0,trm1=0, k=0,trm2=0,k1=0,trm3=0,
R2Yscr= 0,Q2Yscr= 0,AD_train= 0,AD_test = 0, MDlist = "", GElist = "",Intercept = 0, coefficients = "")
final_models = list()
WP_list = list()
lambda_list = list()
y_pred_tr_list = list()
y_pred_te_list = list()
for(b in 1:length(alpha1)){
print(b)
for(a in 1:length(alpha)){
print(a)
X_train_t = cbind(transform_data(X_md_train,type.transformation.md, power = alpha1[b], bb=bb.md,cost = cost.md),
transform_data(X_ge_train,type.transformation.ge, power = alpha[a], bb=bb.ge,cost = cost.ge))
X_test_t = cbind(transform_data(X_md_test,type.transformation.md, power = alpha1[b], bb=bb.md,cost = cost.md),
transform_data(X_ge_test,type.transformation.ge, power = alpha[a], bb=bb.ge,cost = cost.ge))
L <- lambda.grid(y=y_train, x=X_train_t, nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = X_train_t , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t), X_test = as.matrix(X_test_t),
y_train, y_test, md_list=md_list, gene_list=gene_list,inner.train.prop,
nPermValm = n.splits.val,nPermScr = n.splits.scr)
i.pows = data.frame(md_pow=alpha1[b], ge_pow=alpha[a])
Me = cbind(i.pows,Me)
colnames(Metrics) = colnames(Me)
Metrics = rbind(Metrics, Me)
WP_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = WP
final_models[[paste(alpha1[b],"-",alpha[a],sep="")]] = u2f
lambda_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = L
y_pred_tr_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = y_pred_train
y_pred_te_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = y_pred_test
} #end for alpha
}
Metrics = Metrics[-1,]
} #end if type.transformation == "abs.alpha.power"
}else{
print("Apply different Transformation")
if(type.transformation.md %in% c("none","abs","mul","log.abs") & type.transformation.ge %in% c("none","abs","mul","log.abs")){
print(paste("MD transformation",type.transformation.md))
print(paste("GE transformation",type.transformation.ge))
X_train_t = cbind(transform_data(X_md_train,type.transformation.md, bb=bb.md,cost = cost.md),
transform_data(X_ge_train,type.transformation.ge, bb=bb.ge,cost = cost.ge))
X_test_t = cbind(transform_data(X_md_test,type.transformation.md, bb=bb.md,cost = cost.md),
transform_data(X_ge_test,type.transformation.ge, bb=bb.ge,cost = cost.ge))
L <- lambda.grid(y=y_train, x=as.matrix(X_train_t), nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = as.matrix(X_train_t) , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t), X_test = as.matrix(X_test_t), y_train, y_test, md_list=md_list, gene_list=gene_list,inner.train.prop,nPermValm = n.splits.val,nPermScr = n.splits.scr)
final_models = list(u2f=u2f)
WP_list = list(WP=WP)
lambda_list = list(L=L)
y_pred_tr_list = list(y_pred_train=y_pred_train)
y_pred_te_list = list(y_pred_test=y_pred_test)
Metrics = Me
}
if((type.transformation.md %in% c("none","abs","mul","log.abs") & type.transformation.ge %in% c("abs.alpha.power","log.abs.alpha.power")) |
(type.transformation.ge %in% c("none","abs","mul","log.abs") & type.transformation.md %in% c("abs.alpha.power","log.abs.alpha.power"))){
print(paste("MD transformation",type.transformation.md))
print(paste("GE transformation",type.transformation.ge))
x1 = c(ge = type.transformation.ge,md=type.transformation.md)
xx1 = names(x1)[which(x1 %in% c("abs.alpha.power","log.abs.alpha.power"))]
Metrics = data.frame(md_pow = 0, ge_pow = 0,nMD = 0, nGE = 0, MSE_tr = 0, MSE_te=0,R2_tr = 0,R2_te = 0,
Q2 = 0,CCC = 0,Q2F1 = 0,Q2F2 = 0, Q2F3 = 0, #rm2 = 0,
rm2 = 0,dr2m = 0,trm1=0, k=0,trm2=0,k1=0,trm3=0,
R2Yscr= 0,Q2Yscr= 0,AD_train= 0,AD_test = 0, MDlist = "", GElist = "",Intercept = 0, coefficients = "")
final_models = list()
WP_list = list()
lambda_list = list()
y_pred_tr_list = list()
y_pred_te_list = list()
for(a in 1:length(alpha)){
X_train_t = cbind(transform_data(X_md_train,type.transformation.md, bb=bb.md,cost = cost.md),
transform_data(X_ge_train,type.transformation.ge, bb=bb.ge,cost = cost.ge))
X_test_t = cbind(transform_data(X_md_test,type.transformation.md, bb=bb.md,cost = cost.md),
transform_data(X_ge_test,type.transformation.ge, bb=bb.ge,cost = cost.ge))
L <- lambda.grid(y=y_train, x=as.matrix(X_train_t), nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = X_train_t , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t),
X_test = as.matrix(X_test_t), y_train, y_test, md_list=md_list,
gene_list=gene_list,inner.train.prop,nPermValm = n.splits.val,nPermScr = n.splits.scr)
if(xx1 == "ge"){
i.pows = data.frame(md_pow="", ge_pow=alpha[a])
}else{
i.pows = data.frame(md_pow=alpha[a], ge_pow="")
}
Me = cbind(i.pows,Me)
colnames(Metrics) = colnames(Me)
Metrics = rbind(Metrics, Me)
WP_list[[a]] = WP
final_models[[a]] = u2f
lambda_list[[a]] = L
y_pred_tr_list[[a]] = y_pred_train
y_pred_te_list[[a]] = y_pred_test
} #end for alpha
Metrics = Metrics[-1,]
}
if(type.transformation.md %in% c("abs.alpha.power","log.abs.alpha.power") & type.transformation.ge %in% c("abs.alpha.power","log.abs.alpha.power")){
print(paste("MD transformation",type.transformation.md))
print(paste("GE transformation",type.transformation.ge))
Metrics = data.frame(md_pow = 0, ge_pow = 0,nMD = 0, nGE = 0, MSE_tr = 0, MSE_te=0,R2_tr = 0,R2_te = 0,
Q2 = 0,CCC = 0,Q2F1 = 0,Q2F2 = 0, Q2F3 = 0, #rm2 = 0,
rm2 = 0,dr2m = 0,trm1=0, k=k,trm2=0,k1=0,trm3=0,
R2Yscr= 0,Q2Yscr= 0,AD_train= 0,AD_test = 0, MDlist = "", GElist = "",Intercept = 0, coefficients = "")
final_models = list()
WP_list = list()
lambda_list = list()
y_pred_tr_list = list()
y_pred_te_list = list()
for(b in 1:length(alpha1)){
print(b)
for(a in 1:length(alpha)){
print(a)
X_train_t = cbind(transform_data(X_md_train,type.transformation.md, power = alpha1[b], bb=bb.md,cost = cost.md),
transform_data(X_ge_train,type.transformation.ge, power = alpha[a], bb=bb.ge,cost = cost.ge))
X_test_t = cbind(transform_data(X_md_test,type.transformation.md, power = alpha1[b], bb=bb.md,cost = cost.md),
transform_data(X_ge_test,type.transformation.ge, power = alpha[a], bb=bb.ge,cost = cost.ge))
L <- lambda.grid(y=y_train, x=X_train_t, nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = X_train_t , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t), X_test = as.matrix(X_test_t),
y_train, y_test, md_list=md_list, gene_list=gene_list,inner.train.prop,
nPermValm = n.splits.val,nPermScr = n.splits.scr)
i.pows = data.frame(md_pow=alpha1[b], ge_pow=alpha[a])
Me = cbind(i.pows,Me)
colnames(Metrics) = colnames(Me)
Metrics = rbind(Metrics, Me)
WP_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = WP
final_models[[paste(alpha1[b],"-",alpha[a],sep="")]] = u2f
lambda_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = L
y_pred_tr_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = y_pred_train
y_pred_te_list[[paste(alpha1[b],"-",alpha[a],sep="")]] = y_pred_test
} #end for alpha
}
Metrics = Metrics[-1,]
}
}
## Output
ans <- list()
ans$finalModels <- final_models
ans$williams_plots <- WP_list
ans$Metrics <- Metrics
ans$lambda <- lambda_list
ans$pred_tr <- y_pred_tr_list
ans$pred_te <- y_pred_te_list
ans$X_train <- X_train
ans$X_test <- X_test
ans$y_train <- y_train
ans$y_test <- y_test
#ans$test.idx <- test.index
class(ans) <- 'hqsar'
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.