#' Random split for glmnet
#'
#' Computes random split validation for glmnet, produces a plot, and returns a value for lambda
#' @import zeallot
#' @param X is the dataset matrix with samples on rows and features on columns.
#' @param y is the numeric vector of response variables.
#' @param type.transformation is a string indicating the type of transformation to be performed at the data before fitting the model.
#' It can be one of the following: "none","abs","abs.alpha.power","mul","log.abs","log.abs.alpha.power"; none is the default parameter.
#' @param alpha_values is a numeric vector of alpha value to be used in the abs.alpha.power transformation. Default value is c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2)
#' @param cost is a numeric constant used when type.transformation = mul.
#' @param bb is the base of the logarithm used when type.transformation = log.abs or log.abs.alpha.power
#' @param type.solution is a string indicating the type of solution to compute. Possible values are: min, uci and lci; if standard min or max of the average CV function.
#' if uci, take the the most parsimonous solution within the tot percentage of confidence bands around the standard solution. if lci a less partimosious solution within the tot percentage of confidence bands around the standard solution is selected
#' @param measure is the measure used to perform the choice of the optimal lambda value. Possible values are MSE and R2. Default value is MSE
#' @param intercept is a boolean valus indicating if we want to fit or not the intercept. Default valuw is TRUE
#' @param th is the size of the confidence interval. Default value is 1.96
#' @param cor_th is the maximum accepted correlation between couple of features. Default value is 0.95
#' @param zero_th is the maximum percentage of zeros accepted in a feature. Default value is 0.8
#' @param n.splits is the number of random split to be performed. Default value is 25
#' @param inner.train.prop is the percentage of samples from the train test to be used as training set in the random-split method. Default value is 0.9 (90)
#' @param nLambda number of lambda to be tested in the LASSO model
#' @param n.splits.val is the number of random split to compute validation metrics. Default value is 25
#' @param n.splits.scr is the number of random split to perform the y-scrambling test. Default value is 25
#' @param nCores is the number of cores to be used
#' @return an object of class tqsar containing a list of final models,
#' a list of williams plot object
#' a dataframe with all the metrices computed for the different models, and
#' a list of lambda values used to train the LASSO model.
#' \item{finalModels}{a list with one or more models coming from the RCVLasso function}
#' \item{williams_plots}{a list of one or more williams plot objects}
#' \item{Metrics}{a dataframe with all the internal and external metrics estimated for every model}
#' \item{lambda}{a list of one or more vector of lambda used to tune the LASSO models}
#' @keywords qsar
#' @export
tqsar = function(X_train,X_test,y_train,y_test,type.transformation = "none",alpha_values = c(0.1,0.25,0.5,0.75,1,1.25,1.5,1.75,2),
cost=3, bb = 2, type.solution="lci", measure="MSE", intercept= TRUE, th = 1.96,
cor_th=.95, zero_th=.8,
n.splits = 10,inner.train.prop=0.9, nLambda = 100,n.splits.val=25,n.splits.scr=25,
nCores = 10,md_list = c(),gene_list = c()){
if(!type.transformation %in% c("none","abs","abs.alpha.power","mul","log.abs","log.abs.alpha.power"))
stop("'type.transformation' must be one of the following: none,abs,abs.alpha.power,mul,log.abs,log.abs.alpha.power")
if(!measure %in% c("MSE","R2"))
stop("'measure' must be one of the following: 'MSE','R2'")
if(!type.solution %in% c("uci","lci","min"))
stop("'type.solution' must be one of the following: 'uci','lci','min'")
if(cor_th < 0 | cor_th >1 ) stop("'cor_th' must be a number between 0 and 1")
if(zero_th < 0 | zero_th >1 ) stop("'zero_th' must be a number between 0 and 1")
#if(p_train < 0 | p_train >1 ) stop("'p_train' must be a number between 0 and 1")
if(inner.train.prop < 0 | inner.train.prop >1 ) stop("'inner.train.prop' must be a number between 0 and 1")
set.seed(1)
# training/test split
#c(X_train,X_test,y_train,y_test,xxx) %<-% training_test_split(X,y,p_train)
if(type.transformation == "none" | type.transformation == "abs" | type.transformation == "mul" | type.transformation == "log.abs"){
X_train_t = transform_data(X_train,type.transformation, cost = cost,bb = bb)
X_test_t = transform_data(X_test,type.transformation, cost = cost, bb=bb)
L <- lambda.grid(y=y_train, x=as.matrix(X_train_t), nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = as.matrix(X_train_t) , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t), X_test = as.matrix(X_test_t), y_train, y_test, md_list=md_list, gene_list=gene_list,inner.train.prop,nPermValm = n.splits.val,nPermScr = n.splits.scr)
final_models = list(u2f=u2f)
WP_list = list(WP=WP)
lambda_list = list(L=L)
y_pred_tr_list = list(y_pred_train=y_pred_train)
y_pred_te_list = list(y_pred_test=y_pred_test)
Metrics = Me
}
if(type.transformation == "abs.alpha.power" | type.transformation == "log.abs.alpha.power"){
# Metrics = data.frame(nMD = 0, nGE = 0, MSE_tr = 0, MSE_te=0,R2_tr = 0,R2_te = 0,
# Q2 = 0,CCC = 0,Q2F1 = 0,Q2F2 = 0, Q2F3 = 0, rm2 = 0,
# R2Yscr= 0,Q2Yscr= 0,AD_train= 0,AD_test = 0, MDlist = "mdlist",GElist = "gelist",Intercept = 0, coefficients = "coeff")
Metrics = data.frame(nMD = 0, nGE = 0, MSE_tr = 0, MSE_te=0,R2_tr = 0,R2_te = 0,
Q2 = 0,CCC = 0,Q2F1 = 0,Q2F2 = 0, Q2F3 = 0, #rm2 = 0,
rm2 = 0,drm2 = 0,trm1=0, k=0,trm2=0,k1=0,trm3=0,
R2Yscr= 0,Q2Yscr= 0,AD_train= 0,AD_test = 0, MDlist = "", GElist = "",Intercept = 0, coefficients = "")
final_models = list()
WP_list = list()
lambda_list = list()
y_pred_tr_list = list()
y_pred_te_list = list()
for(a in 1:length(alpha_values)){
print(a)
#X_train_t = transform_data(X,type.transformation, power = alpha_values[a], bb=bb)
X_train_t = transform_data(X_train,type.transformation, power = alpha_values[a], bb=bb)
X_test_t = transform_data(X_test,type.transformation, power = alpha_values[a], bb=bb)
L <- lambda.grid(y=y_train, x=as.matrix(X_train_t), nlambda = nLambda)
u2f <- RCVLassoPar(y = y_train , x = X_train_t , lambda = L, n.splits = n.splits , train.prop = inner.train.prop,
intercept = intercept , type.solution = type.solution,measure = measure,nCores = nCores,th = th)
c(Me, WP,y_pred_test,y_pred_train) %<-% evaluating_model(bm = u2f, X_train = as.matrix(X_train_t), X_test = as.matrix(X_test_t), y_train, y_test, md_list=md_list, gene_list=gene_list,inner.train.prop,nPermValm = n.splits.val,nPermScr = n.splits.scr)
colnames(Me) = colnames(Metrics)
Metrics = rbind(Metrics, Me)
WP_list[[a]] = WP
final_models[[a]] = u2f
lambda_list[[a]] = L
y_pred_tr_list[[a]] = y_pred_train
y_pred_te_list[[a]] = y_pred_test
} #end for alpha
Metrics = Metrics[-1,]
} #end if type.transformation == "abs.alpha.power"
## Output
ans <- list()
ans$finalModels <- final_models
ans$williams_plots <- WP_list
ans$Metrics <- Metrics
ans$lambda <- lambda_list
ans$pred_tr <- y_pred_tr_list
ans$pred_te <- y_pred_te_list
ans$X_train <- X_train
ans$X_test <- X_test
ans$y_train <- y_train
ans$y_test <- y_test
ans$test.idx <- test.idx
class(ans) <- 'tqsar'
return(ans)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.