Nothing
checkColnamesIllegalChars.mb <- function(X){
for(block in names(X)){
new_cn_X <- deleteIllegalChars(colnames(X[[block]]))
new_cn_X <- transformIllegalChars(new_cn_X)
if(length(unique(new_cn_X)) == length(unique(colnames(X[[block]])))){
colnames(X[[block]]) <- new_cn_X
}else{
stop(paste0("When deleting illegal chars, some colnames in X (", block,") get the same name. Update manually the colnames to avoid the next chars: ", paste0(pkg.env$IllegalChars, collapse = " ")))
}
}
return(X)
}
#' getEPV.mb
#' @description Provides a quantitative assessment of the dataset by computing the Events per Variable
#' (EPV) metric for multi-block data, which gauges the proportionality between observed events and the
#' number of explanatory variables.
#'
#' @details In the realm of survival analysis, the balance between observed events and explanatory
#' variables is paramount. The `getEPV` function serves as a tool for researchers to ascertain this
#' balance, which can be pivotal in determining the robustness and interpretability of subsequent
#' statistical models. By evaluating the ratio of events in the `Y` matrix to the variables in the `X`
#' matrix, the function yields the EPV metric. It is of utmost importance that the `Y` matrix encompasses
#' two distinct columns, namely "time" and "event". The latter, "event", should strictly encapsulate
#' binary values, delineating censored (either 0 or FALSE) and event (either 1 or TRUE) observations.
#' To ensure the integrity of the data and the precision of the computation, the function is equipped
#' with an error mechanism that activates if the "event" column remains undetected.
#' @param X List of numeric matrices or data.frames. Explanatory variables. Qualitative variables must be transform
#' into binary variables.
#' @param Y Numeric matrix or data.frame. Response variables. Object must have two columns named as
#' "time" and "event". For event column, accepted values are: 0/1 or FALSE/TRUE for censored and event
#' observations.
#'
#' @return Return the EPV value for a specific X (explanatory variables) and Y (time and censored variables) data.
#'
#' @author Pedro Salguero Garcia. Maintainer: pedsalga@upv.edu.es
#'
#' @export
#'
#' @examples
#' data("X_multiomic")
#' data("Y_multiomic")
#' X <- X_multiomic
#' Y <- Y_multiomic
#' getEPV.mb(X,Y)
getEPV.mb <- function(X,Y){
EPV_lst <- list()
for(b in names(X)){
if("event" %in% colnames(Y)){
EPV_lst[[b]] <- getEPV(X[[b]], Y)
}else{
stop("Column event has not been detected in Y matrix.")
}
}
return(EPV_lst)
}
#' deleteZeroOrNearZeroVariance.mb
#' @description Provides a robust mechanism to filter out variables from a dataset that exhibit zero
#' or near-zero variance, thereby enhancing the quality and interpretability of subsequent statistical
#' analyses.
#'
#' @details The `deleteZeroOrNearZeroVariance` function is an indispensable tool in the preprocessing
#' phase of statistical modeling. In many datasets, especially high-dimensional ones, certain variables
#' might exhibit zero or near-zero variance. Such variables can be problematic as they offer limited
#' information variance and can potentially distort the results of statistical models, leading to
#' issues like overfitting. By leveraging the `caret::nearZeroVar()` function, this tool offers a
#' rigorous method to identify and exclude these variables. Users are afforded flexibility in their
#' choices, with options to remove only zero variance variables, near-zero variance variables, or
#' both. The function also provides the capability to set a frequency cutoff, `freqCut`, which
#' determines the threshold for near-zero variance based on the ratio of the most frequent value to
#' the second most frequent value. For scenarios where certain variables are deemed essential and
#' should not be removed regardless of their variance, the `toKeep.zv` parameter allows users to specify
#' a list of such variables.
#' @param X List of numeric matrices or data.frame. Explanatory variables. Qualitative variables must
#' be transform into binary variables.
#' @param remove_near_zero_variance Logical. If remove_near_zero_variance = TRUE, near zero variance
#' variables will be removed (default: TRUE).
#' @param remove_zero_variance Logical. If remove_zero_variance = TRUE, zero variance variables will
#' be removed (default: FALSE).
#' @param toKeep.zv Character vector. Name of variables in X to not be deleted by (near) zero variance
#' filtering (default: NULL).
#' @param freqCut Numeric. Cutoff for the ratio of the most common value to the second most common
#' value (default: 95/5).
#'
#' @return A list of two objects.
#' \code{X}: A list with as many blocks as X input, but with the variables filtered.
#' \code{variablesDeleted}: A list with as many blocks as X input, with the name of the variables
#' that have been removed.
#'
#' @author Pedro Salguero Garcia. Maintainer: pedsalga@upv.edu.es
#'
#' @export
#'
#' @examples
#' data("X_multiomic")
#' X <- X_multiomic
#' filter <- deleteZeroOrNearZeroVariance.mb(X, remove_near_zero_variance = TRUE)
deleteZeroOrNearZeroVariance.mb <- function(X, remove_near_zero_variance = TRUE,
remove_zero_variance = FALSE,
toKeep.zv = NULL, freqCut = 95/5){
auxX <- X
variablesDeleted <- NULL
perc_unique_values <- NULL
if(remove_near_zero_variance){
lst.zv <- purrr::map(auxX, ~deleteZeroVarianceVariables(data = ., info = TRUE, mustKeep = toKeep.zv, freqCut = freqCut))
#deleteZeroVarianceVariables(data = auxX$rnaseq, info = TRUE, mustKeep = toKeep.zv, freqCut = freqCut)
variablesDeleted <- purrr::map(lst.zv, ~.$variablesDeleted[,1])
perc_unique_values <- purrr::map(lst.zv, ~.$perc_unique_values[,2,drop=F])
if(any(unlist(lapply(variablesDeleted, is.null)))){ #if any not null
for(n in names(variablesDeleted)){
if(is.null(variablesDeleted[[n]])){
next
}else{
auxX[[n]] <- auxX[[n]][,!colnames(auxX[[n]]) %in% variablesDeleted[[n]]]
}
}
}
}else if(remove_zero_variance){
lst.zv <- purrr::map(auxX, ~deleteZeroVarianceVariables(data = ., info = TRUE, mustKeep = toKeep.zv, onlyZero = TRUE))
variablesDeleted <- purrr::map(lst.zv, ~.$variablesDeleted[,1])
perc_unique_values <- purrr::map(lst.zv, ~.$perc_unique_values[,2,drop=F])
if(any(unlist(lapply(variablesDeleted, is.null)))){ #if any not null
for(n in names(variablesDeleted)){
if(is.null(variablesDeleted[[n]])){
next
}else{
auxX[[n]] <- auxX[[n]][,!colnames(auxX[[n]]) %in% variablesDeleted[[n]]]
}
}
}
}
return(list(X = auxX, variablesDeleted = variablesDeleted, perc_unique_values = perc_unique_values))
}
#' deleteNearZeroCoefficientOfVariation.mb
#' @description Filters out variables from a dataset that exhibit a coefficient of variation below a
#' specified threshold, ensuring the retention of variables with meaningful variability.
#'
#' @details The `deleteNearZeroCoefficientOfVariation` function is a pivotal tool in data preprocessing,
#' especially when dealing with high-dimensional datasets. The coefficient of variation (CoV) is a
#' normalized measure of data dispersion, calculated as the ratio of the standard deviation to the mean.
#' In many scientific investigations, variables with a low CoV might be considered as offering limited
#' discriminative information, potentially leading to noise in subsequent statistical analyses. By
#' setting a threshold through the `LIMIT` parameter, this function provides a systematic approach to
#' identify and exclude variables that do not meet the desired variability criteria. The underlying
#' rationale is that variables with a CoV below the set threshold might not contribute significantly
#' to the variability of the dataset and could be redundant or even detrimental for certain analyses.
#' The function returns a modified dataset, a list of deleted variables, and the computed coefficients
#' of variation for each variable. This comprehensive output ensures that researchers are well-informed
#' about the preprocessing steps and can make subsequent analytical decisions with confidence.
#'
#' @param X List of numeric matrices or data.frames. Explanatory variables. Qualitative variables must
#' be transform into binary variables.
#' @param LIMIT Numeric. Cutoff for minimum variation. If coefficient is lesser than the limit, the
#' variables are removed because not vary enough (default: 0.1).
#'
#' @return A list of three objects.
#' \code{X}: A list with as many blocks as X input, but with the variables filtered.
#' \code{variablesDeleted}: A list with as many blocks as X input, with the name of the variables that have been removed.
#' \code{coeff_variation}: A list with as many blocks as X input, with the coefficient of variation per variable.
#'
#' @author Pedro Salguero Garcia. Maintainer: pedsalga@upv.edu.es
#'
#' @export
#'
#' @examples
#' data("X_multiomic")
#' X <- X_multiomic
#' filter <- deleteNearZeroCoefficientOfVariation.mb(X, LIMIT = 0.1)
#'
deleteNearZeroCoefficientOfVariation.mb <- function(X, LIMIT = 0.1){
variablesDeleted <- list()
coef_list <- list()
newX <- X
for(b in names(newX)){
cvar <- apply(newX[[b]], 2, function(x){sd(x)/abs(mean(x))})
variablesDeleted[[b]] <- names(cvar)[which(cvar < LIMIT)]
if(length(variablesDeleted[[b]])>0){
newX[[b]] <- newX[[b]][,!colnames(newX[[b]]) %in% variablesDeleted[[b]]]
}else{
variablesDeleted[[b]] <- 0
}
coef_list[[b]] <- cvar
}
return(list("X" = newX, "variablesDeleted" = variablesDeleted, "coeff_variation" = coef_list))
}
checkX.colnames.mb <- function(X){
for(b in names(X)){
if(is.null(colnames(X[[b]]))){
stop("X matrix/data.frame must contain colnames in all of its blocks.")
}
}
}
checkXY.rownames.mb <- function(X, Y, verbose = TRUE){
# Check if X and Y are matrices
if (!isa(X, "list")){
if(verbose){
stop("X data is not a list\n")
}
X <- data.matrix(X)
}else{
for(b in names(X)){
if(!nrow(X[[b]])==nrow(Y)){
stop(paste0("X[",b,"]", " and Y have different number of observations."))
}
if(is.null(rownames(X[[b]])) & is.null(rownames(Y))){
if(verbose){
message(paste0("Rownames of X[",b,"] and Y are NULL. Named from 1 to ", nrow(X[[b]]), "."))
}
rownames(Y) <- rownames(X[[b]]) <- 1:nrow(X[[b]])
}else if(is.null(rownames(X[[b]]))){
rownames(X[[b]]) <- rownames(Y)
}else{
rownames(Y) <- rownames(X[[b]])
}
}
}
return(list(X = X, Y = Y))
}
checkXY.mb.class <- function(X, Y, verbose = TRUE){
# Check if X and Y are matrices
if (!isa(X, "list")){
if(verbose){
stop("X data is not a list\n")
}
X <- data.matrix(X)
}else{
if(all(unlist(lapply(X, class)) %in% "data.frame")){
X <- lapply(X, data.matrix)
}else if(all(unlist(lapply(X, function(x){class(x)[[1]]})) %in% c("data.frame", "matrix"))){
for(block in names(X)){
if(is.data.frame(X[[block]])[[1]]){
X[[block]] <- data.matrix(X[[block]])
}
}
}else if(!all(unlist(lapply(X, class)) %in% c("matrix", "array"))){
stop("X elements are not a matrix or a data.frame\n")
}
}
if (!is.matrix(Y)) {
if(is.data.frame(Y)){
if(verbose){
message("Y data is not a matrix, applying data.matrix\n")
}
Y <- data.matrix(Y)
}else{
stop("Y data is not a matrix or a data.frame")
}
}
for(i in names(X)){
if(any(rowSums(is.na(X[[i]]))>0)){
stop(paste0("Block ", i, " have observations with NAs. MB:PLS models cannot manage NAs yet."))
}
}
return(list(X = X, Y = Y))
}
check.mb.ncomp <- function(X, max.ncomp, verbose = FALSE){
if(length(max.ncomp)>1){
stop("max.ncomp must be a number. Not a vector.")
}
d <- min(unlist(lapply(X, ncol)))
if(max.ncomp > d){
if(verbose){
message(paste0("Number of components used by all blocks cannot be grater than the number of variables. Updated to ", ncol(X), "."))
}
max.ncomp <- d
}
return(max.ncomp)
}
check.mb.maxPredictors <- function(X, Y, MIN_EPV, max.variables, verbose = FALSE){
max_n_predictors <- purrr::map(X, ~getMaxNPredictors(n.var = ncol(.), Y, MIN_EPV))
max_n_predictors <- max(unlist(max_n_predictors))
max.variables.res <- max.variables
if(max_n_predictors==0){
stop_quietly(paste0("Less than ", MIN_EPV, " events. Stop program."))
}
message(paste0("As we are working with a multiblock approach with ", length(X),
" blocks, a maximum of ", round(max_n_predictors/length(X)), " components can be used."))
max_n_predictors = round(max_n_predictors/length(X))
if(max.variables > max_n_predictors){
if(verbose){
message(paste0("As we have a low number of events, we should work with a maximum of ", max_n_predictors,
" variables/components in final cox model."))
}
max.variables.res = max_n_predictors
}
return(max.variables.res)
}
## Divide the data into 4 lists, two train and two test
## Each list has n_run lists of k_folds train/test subsets.
## n_run = 5 and k_folds = 10 -> a total of 50 models
## X must be a list of matrix
## Y must be a data.frame with "time" and "event" columns
splitData_Iterations_Folds.mb <- function(X, Y, n_run, k_folds, seed = 123){
set.seed(seed)
if(!is.numeric(n_run) & n_run > 0){
stop_quietly("Parameter 'n_run' must be a numeric greater or equal than 1.")
}
if(!is.numeric(k_folds) & k_folds > 1){
stop_quietly("Parameter 'k_folds' must be a numeric greater or equal than 2.") #At least two folds for train/test
}
if(k_folds > nrow(Y)){
warning(paste0("Parameter 'k_folds' cannot be greater than the number of observations (changed to ", nrow(Y), ").\n")) #Folds as observation as maximum
k_folds <- nrow(Y)
}
if(!any(c("event", "status") %in% colnames(Y))){
stop_quietly("Y data.frame must contain the colname 'event' or 'status'.")
}else if("status" %in% colnames(Y)){
colnames(Y)[colnames(Y)=="status"] <- "event"
}
lst_X_train <- list()
lst_Y_train <- list()
lst_X_test <- list()
lst_Y_test <- list()
lst_obs_index_train <- list()
lst_obs_index_test <- list()
for(i in 1:n_run){
if(length(unique(Y[,"event"]))==1){
testIndex <- caret::createFolds(y = Y[,"time"],
k = k_folds,
list = TRUE)
}else{
testIndex <- caret::createFolds(y = Y[,"event"],
k = k_folds,
list = TRUE)
}
#for each fold, take the others as train
lst_X_data_train_aux <- purrr::map(X, ~lapply(testIndex, function(ind, dat) dat[-ind,], dat = .))
lst_Y_data_train <- lapply(testIndex, function(ind, dat) dat[-ind,], dat = Y)
#for each fold, take just one as test
lst_X_data_test_aux <- purrr::map(X, ~lapply(testIndex, function(ind, dat) dat[ind,], dat = .))
lst_Y_data_test <- lapply(testIndex, function(ind, dat) dat[ind,], dat = Y)
#### CHANGE ORDER
lst_X_data_train <- list()
lst_X_data_test <- list()
for(f in names(lst_X_data_train_aux[[1]])){
lst_X_data_train[[f]] <- list()
lst_X_data_test[[f]] <- list()
for(b in names(X)){
lst_X_data_train[[f]][[b]] <- lst_X_data_train_aux[[b]][[f]]
lst_X_data_test[[f]][[b]] <- lst_X_data_test_aux[[b]][[f]]
}
}
lst_X_train[[i]] <- lst_X_data_train
lst_Y_train[[i]] <- lst_Y_data_train
lst_X_test[[i]] <- lst_X_data_test
lst_Y_test[[i]] <- lst_Y_data_test
lst_obs_index_test[[i]] <- testIndex
aux <- 1:nrow(Y)
lst_obs_index_train[[i]] <- lapply(testIndex, function(ind, aux) aux[-ind], aux = aux)
}
names(lst_X_train) <- paste0("run", 1:n_run)
names(lst_Y_train) <- paste0("run", 1:n_run)
names(lst_X_test) <- paste0("run", 1:n_run)
names(lst_Y_test) <- paste0("run", 1:n_run)
names(lst_obs_index_train) <- paste0("run", 1:n_run)
names(lst_obs_index_test) <- paste0("run", 1:n_run)
return(list(lst_X_train = lst_X_train, lst_Y_train = lst_Y_train, lst_X_test = lst_X_test, lst_Y_test = lst_Y_test, lst_train_index = lst_obs_index_train, lst_test_index = lst_obs_index_test, k_folds = k_folds))
}
XY.mb.scale <- function(X, Y, x.center, x.scale, y.center, y.scale){
xmeans <- NULL
xsds <- NULL
ymeans <- NULL
ysds <- NULL
#CENTERING X
if(length(x.center)>1){
if(length(x.center)==length(X) & all(names(X) %in% names(x.center))){
Xh <- purrr::map2(.x = X, .y = names(X), ~scale(., center = x.center[[.y]], scale = FALSE))
}else{
stop("Names in x.center do not match with names in X.")
}
}else{
Xh <- purrr::map(X, ~scale(., center = x.center, scale = FALSE))
}
#SCALING X
if(length(x.scale)>1){
if(length(x.scale)==length(X) & all(names(X) %in% names(x.scale))){
Xh <- purrr::map2(.x = Xh, .y = names(X), ~scale(., center = FALSE, scale = x.scale[[.y]]))
}else{
stop("Names in x.scale do not match with names in X.")
}
}else{
Xh <- purrr::map(Xh, ~scale(., center = FALSE, scale = x.scale))
}
xmeans <- purrr::map(Xh, ~attr(., "scaled:center"))
xsds <- purrr::map(Xh, ~attr(., "scaled:scale"))
#SCALING Y
if(y.center | y.scale){
Yh_time <- scale(Y[,"time", drop = FALSE], center = y.center, scale = y.scale)
ymeans <- attr(Y, "scaled:center")
ysds <- attr(Y, "scaled:scale")
Yh <- Y
Yh[,"time"] <- Yh_time
}else{
Yh <- Y
}
return(list(Xh = Xh, xmeans = xmeans, xsds = xsds, Yh = Yh, ymeans = ymeans, ysds = ysds))
}
Xh_XXNA <- function(Xh, XXNA, value = 0){
for(omic in names(Xh)){
Xh[[omic]][XXNA[[omic]]] <- value
}
return(Xh)
}
getCIndex_AUC_CoxModel_block.spls <- function(Xh, DR_coxph_ori, Yh, n.comp, keepX, scale = FALSE,
near.zero.var = FALSE, EVAL_EVALUATOR = "cenROC",
max.iter = 200, verbose = FALSE, times = NULL,
max_time_points = 15){
model <- mixOmics::block.spls(X = Xh, Y = DR_coxph_ori, ncomp = n.comp, keepX = keepX, scale = scale, near.zero.var = near.zero.var, max.iter = max.iter)
tt_mbsplsDR = model$variates[names(Xh)]
d <- as.data.frame(tt_mbsplsDR[[1]][,,drop = FALSE])
for(b in names(Xh)[2:length(Xh)]){
d <- cbind(d, as.data.frame(tt_mbsplsDR[[b]][,,drop = FALSE]))
}
colnames(d) <- apply(expand.grid(colnames(tt_mbsplsDR[[1]]), names(Xh)), 1, paste, collapse="_")
cox_model <- NULL
cox_model$fit <- tryCatch(
# Specifying expression
expr = {
survival::coxph(formula = survival::Surv(time,event) ~ .,
data = cbind(d, Yh),
ties = "efron",
singular.ok = TRUE,
robust = TRUE,
nocenter = rep(1, ncol(d)),
model = TRUE, x = TRUE)
},
# Specifying error message
error = function(e){
message(paste0("mb_splsdrcox_dynamic: ",conditionMessage(e)))
return(NA)
}
)
lp <- getLinealPredictors(cox = cox_model$fit, data = d)
if(is.null(times)){
times <- getTimesVector(Yh, max_time_points)
}
#C-index and AUC
lst_AUC_values <- getAUC_from_LP_2.0(linear.predictors = lp, Y = Yh, times = times, bestModel = NULL, eval = "mean", method = EVAL_EVALUATOR, PARALLEL = FALSE, verbose = verbose)
#BRIER
#lst_BRIER_values <- survAUC_BRIER_LP(lp = lp$fit, Y = Yh, lp_new = lp$fit, Y_test = Yh)
lst_BRIER_values <- SURVCOMP_BRIER_LP(lp_train = lp$fit, Y_train = Yh, lp_test = lp$fit, Y_test = Yh)
return(list("C.Index" = cox_model$fit$concordance["concordance"], "AUC" = lst_AUC_values$AUC, "IBS" = lst_BRIER_values$ierror))
}
getCIndex_AUC_CoxModel_block.splsda <- function(Xh, Yh, n.comp, keepX, scale = FALSE,
near.zero.var = FALSE,
EVAL_EVALUATOR = "cenROC", max.iter = 200,
verbose = verbose, times = NULL,
max_time_points = 15){
model <- mixOmics::block.splsda(X = Xh, Y = Yh[,"event"], ncomp = n.comp, keepX = keepX, scale = scale, near.zero.var = near.zero.var, max.iter = max.iter)
tt_mbsplsDR = model$variates[names(Xh)]
d <- as.data.frame(tt_mbsplsDR[[1]][,,drop = FALSE])
for(b in names(Xh)[2:length(Xh)]){
d <- cbind(d, as.data.frame(tt_mbsplsDR[[b]][,,drop = FALSE]))
}
colnames(d) <- apply(expand.grid(colnames(tt_mbsplsDR[[1]]), names(Xh)), 1, paste, collapse="_")
cox_model <- NULL
cox_model$fit <- tryCatch(
# Specifying expression
expr = {
survival::coxph(formula = survival::Surv(time,event) ~ .,
data = cbind(d, Yh),
ties = "efron",
singular.ok = TRUE,
robust = TRUE,
nocenter = rep(1, ncol(d)),
model = TRUE, x = TRUE)
},
# Specifying error message
error = function(e){
message(paste0("mb_splsda_dynamic: ",conditionMessage(e)))
return(NA)
}
)
if(is.null(times)){
times <- getTimesVector(Yh, max_time_points)
}
lp <- getLinealPredictors(cox = cox_model$fit, data = d)
#C-index and AUC
lst_AUC_values <- getAUC_from_LP_2.0(linear.predictors = lp, Y = Yh, times = times, bestModel = NULL, eval = "mean", method = EVAL_EVALUATOR, PARALLEL = FALSE, verbose = verbose)
#BRIER
#lst_BRIER_values <- survAUC_BRIER_LP(lp = lp$fit, Y = Yh, lp_new = lp$fit, Y_test = Yh)
lst_BRIER_values <- SURVCOMP_BRIER_LP(lp_train = lp$fit, Y_train = Yh, lp_test = lp$fit, Y_test = Yh)
return(list("C.Index" = cox_model$fit$concordance["concordance"], "AUC" = lst_AUC_values$AUC, "IBS" = lst_BRIER_values$ierror))
}
getVarExpModel_block.spls <- function(Xh, DR_coxph_ori, n.comp, keepX, scale = FALSE){
model <- mixOmics::block.spls(X = Xh, Y = DR_coxph_ori, ncomp = n.comp, keepX = keepX, scale = scale)
var_exp <- data.frame(lapply(model$prop_expl_var, sum))
}
getBestVectorMB <- function(Xh, DR_coxph = NULL, Yh, n.comp, max.iter, vector, MIN_AUC_INCREASE,
MIN_NVAR = 1, MAX_NVAR = NULL, cut_points = 5, EVAL_METHOD = "AUC",
EVAL_EVALUATOR = "cenROC", PARALLEL = FALSE, mode = "spls", times = NULL,
max_time_points = 15, verbose = FALSE){
if(!mode %in% c("spls", "splsda")){
stop("Mode must be one of: 'spls' or 'splsda'")
}
if(!EVAL_METHOD %in% c("AUC", "IBS", "C.Index")){
stop("Evaluation method must be one of: 'AUC', 'BRIER' or 'c_index'")
}
max_ncol <- purrr::map(Xh, ~ncol(.))
if(is.null(MAX_NVAR)){
MAX_NVAR <- max_ncol
}
if(!length(MAX_NVAR) == length(names(Xh))){
MAX_NVAR <- rep(MAX_NVAR[[1]], length(names(Xh)))
names(MAX_NVAR) <- names(Xh)
}
if(!length(MIN_NVAR) == length(names(Xh))){
MIN_NVAR <- rep(MIN_NVAR[[1]], length(names(Xh)))
names(MIN_NVAR) <- names(Xh)
}
if(is.null(vector)){
#vector <- purrr::map(names(Xh), ~c(min(MIN_NVAR, max_ncol[[.]]), (max_ncol[[.]]+min(MIN_NVAR, max_ncol[[.]]))/2, min(max_ncol[[.]], MAX_NVAR)))
vector <- purrr::map(names(Xh), ~getVectorCuts(vector = c(min(MIN_NVAR[[.]], max_ncol[[.]]):min(max_ncol[[.]], MAX_NVAR[[.]])), cut_points = cut_points, verbose = verbose))
names(vector) <- names(Xh)
}else{
#check vector is a list, and each value is less than the max.variables of that block
if(!isa(vector[1],"list")){
aux <- list()
for(b in names(Xh)){
aux[[b]] <- min(vector, dim(Xh[[b]])) #to not overpass the dimension
}
vector <- aux
message(paste0("The initial vector is: ", paste0(vector, collapse = ", ")))
}else{
if(!all(names(vector) %in% names(Xh))){
message("Your vector not contains the block names. A start vector is created using your vector.")
vector <- purrr::map(names(Xh), ~c(vector))
names(vector) <- names(Xh)
message(paste0("The initial vector is: ", paste0(vector, collapse = ", ")))
}
}
}
if(verbose){
message(paste0("Original vector: "))
message(paste0("Block ", names(vector), ": ", paste0(vector), "\n"))
}
lst_mb.spls <- list()
count = 1
var_exp = NULL
#if n_col is minimum than MIN_NVAR, values could be the same, so delete duplicates
vector <- purrr::map(vector, ~unique(.))
l <- vector
all_comb <- expand.grid(l)
## ALL POSSIBLE COMBINATIONS WITH N_VECTOR AND M_BLOCKS
## So we are trying all different number of variables per block
list_KeepX <- list()
for(i in 1:nrow(all_comb)){
keepX = list()
iter_name = NULL
for(k in names(Xh)){
keepX[[k]] = rep(all_comb[[k]][[i]], n.comp)
iter_name = c(iter_name, keepX[[k]][[1]])
}
list_KeepX[[paste0(iter_name, collapse = "_")]] <- keepX
}
if(PARALLEL){
n_cores <- future::availableCores() - 1
if(.Platform$OS.type == "unix") {
future::plan("multicore", workers = min(length(list_KeepX), n_cores))
}else{
future::plan("multisession", workers = min(length(list_KeepX), n_cores))
}
t1 <- Sys.time()
if(mode %in% "spls"){
lst_cox_value <- furrr::future_map(list_KeepX, ~getCIndex_AUC_CoxModel_block.spls(Xh = Xh, DR_coxph_ori = DR_coxph, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, EVAL_EVALUATOR = EVAL_EVALUATOR, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}else{
lst_cox_value <- furrr::future_map(list_KeepX, ~getCIndex_AUC_CoxModel_block.splsda(Xh = Xh, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, EVAL_EVALUATOR = EVAL_EVALUATOR, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}
t2 <- Sys.time()
future::plan("sequential")
}else{
t1 <- Sys.time()
if(mode %in% "spls"){
lst_cox_value <- purrr::map(list_KeepX, ~getCIndex_AUC_CoxModel_block.spls(Xh = Xh, DR_coxph_ori = DR_coxph, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, EVAL_EVALUATOR = EVAL_EVALUATOR, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}else{
lst_cox_value <- purrr::map(list_KeepX, ~getCIndex_AUC_CoxModel_block.splsda(Xh = Xh, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, EVAL_EVALUATOR = EVAL_EVALUATOR, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}
t2 <- Sys.time()
}
df_cox_value <- NULL
for(i in 1:length(lst_cox_value)){
if(EVAL_METHOD=="AUC"){
df_cox_value <- rbind(df_cox_value, lst_cox_value[[i]]$AUC)
}else if(EVAL_METHOD=="C.Index"){
df_cox_value <- rbind(df_cox_value, lst_cox_value[[i]]$C.Index)
}else if(EVAL_METHOD=="IBS"){
df_cox_value <- rbind(df_cox_value, lst_cox_value[[i]]$IBS)
}
}
if(EVAL_METHOD=="IBS"){
df_cox_value <- 1 - df_cox_value #maximize 1-brier
}
rownames(df_cox_value) <- names(list_KeepX)
index <- which.max(df_cox_value) #MAX CONCORDANCE
#Select best keepX
keepX <- list_KeepX[[index]]
FLAG = TRUE
cont = 0
best_c_index <- df_cox_value[index]
best_keepX <- keepX
if(verbose){
message(paste0("First selection: \n"), paste0(paste0("Block ", names(best_keepX), ": ", unlist(purrr::map(best_keepX, ~unique(.)))), "\n"), "Pred. Value (", EVAL_METHOD ,"): ", round(best_c_index, 4), "\n")
}
ori_vector <- vector
aux_vector <- vector
p_val <- rep(NA, length(vector))
names(p_val) <- vector
p_val <- df_cox_value[,1]
while(FLAG){
cont = cont + 1
if(verbose){
message(paste0("Iteration: ", cont, "\n"))
}
new_vector <- list()
#before_vector - always go two sides
for(b in names(best_keepX)){
aux <- best_keepX[[b]][[1]]
index <- which(aux_vector[[b]] < aux)
if(length(index)==0){
value = aux_vector[[b]][which(aux_vector[[b]] == aux)]
}else{
value = round(mean(c(aux, aux_vector[[b]][index][[length(aux_vector[[b]][index])]]))) #last smaller
}
new_vector[[b]] <- unique(c(new_vector[[b]], aux, value))
}
#next_vector - always go two sides
for(b in names(best_keepX)){
aux <- best_keepX[[b]][[1]]
index <- which(aux_vector[[b]] > aux)
if(length(index)==0){
value = aux_vector[[b]][which(aux_vector[[b]] == aux)]
}else{
value = round(mean(c(aux, aux_vector[[b]][index][[1]]))) #first greater
}
new_vector[[b]] <- unique(c(new_vector[[b]], aux, value))
}
# If two blocks and the first with only one value:
# We do not need to test the second block with the same value: clinic = 9, miRNA = 124, 105, 142 :: because 9,124 was tested
# however, if multiple blocks, clinic = 9, miRNA = 124, 105, 142, protein = 124, 105, 142 :: we want to test 124 miRNA with 105 proteomic
# so, we should keep it. Summary, if all block length 1 minus one, delete, in other case, keep. Or just do not test with the same values
# that have been already tested
lst_length <- unlist(lapply(new_vector, length))
if(sum(lst_length==1)==1){ #if just one block with multiples values
for(b in names(best_keepX)){
#first value already tested if multiple
if(length(new_vector[[b]])>1){
new_vector[[b]] <- new_vector[[b]][-1]
}
}
}
## if vector == new vector - means a custom vector has been used
if(identical(new_vector,vector)){
break
}
## sort new_vector
for(b in names(new_vector)){
new_vector[[b]] <- new_vector[[b]][order(new_vector[[b]])]
}
if(verbose){
message(paste0("Testing: \n"), paste0("Block ", names(best_keepX), ": ", new_vector, "\n"))
}
all_comb <- expand.grid(new_vector)
### update all vector
aux_vector <- purrr::map(names(new_vector), ~unique(c(aux_vector[[.]], new_vector[[.]])))
names(aux_vector) <- names(new_vector)
aux_vector <- purrr::map(names(new_vector), ~aux_vector[[.]][order(aux_vector[[.]])])
names(aux_vector) <- names(new_vector)
### OTHER KEEP_VECTOR
list_KeepX_aux <- list()
for(i in 1:nrow(all_comb)){
keepX_aux = list()
iter_name = NULL
for(k in names(Xh)){
keepX_aux[[k]] = rep(all_comb[[k]][[i]], n.comp)
iter_name = c(iter_name, keepX_aux[[k]][[1]])
}
if(paste0(iter_name, collapse = "_") %in% names(list_KeepX_aux)){
next #one already computed
}
list_KeepX_aux[[paste0(iter_name, collapse = "_")]] <- keepX_aux
}
#### ###
## remove any keepX_aux that has been already tested (when at least 2 omics with more than 2 vectors)
if(cont==1){
if(any(names(list_KeepX_aux) %in% rownames(df_cox_value))){
index_tested <- which(names(list_KeepX_aux) %in% rownames(df_cox_value))
list_KeepX_aux <- list_KeepX_aux[-index_tested]
}
}else{
if(any(names(list_KeepX_aux) %in% rownames(df_cox_value_aux))){
index_tested <- which(names(list_KeepX_aux) %in% rownames(df_cox_value_aux))
list_KeepX_aux <- list_KeepX_aux[-index_tested]
}
}
## Compute next c_index
if(PARALLEL){
n_cores <- future::availableCores() - 1
if(.Platform$OS.type == "unix") {
future::plan("multicore", workers = min(length(list_KeepX_aux), n_cores))
}else{
future::plan("multisession", workers = min(length(list_KeepX_aux), n_cores))
}
t1 <- Sys.time()
if(mode %in% "spls"){
lst_cox_value <- furrr::future_map(list_KeepX_aux, ~getCIndex_AUC_CoxModel_block.spls(Xh = Xh, DR_coxph_ori = DR_coxph, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}else{
lst_cox_value <- furrr::future_map(list_KeepX_aux, ~getCIndex_AUC_CoxModel_block.splsda(Xh = Xh, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}
t2 <- Sys.time()
future::plan("sequential")
}else{
t1 <- Sys.time()
if(mode %in% "spls"){
lst_cox_value <- purrr::map(list_KeepX_aux, ~getCIndex_AUC_CoxModel_block.spls(Xh = Xh, DR_coxph_ori = DR_coxph, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}else{
lst_cox_value <- purrr::map(list_KeepX_aux, ~getCIndex_AUC_CoxModel_block.splsda(Xh = Xh, Yh = Yh, n.comp = n.comp, keepX = ., scale = FALSE, near.zero.var = FALSE, max.iter = max.iter, verbose = verbose, times = times, max_time_points = max_time_points), .progress = FALSE)
}
t2 <- Sys.time()
}
df_cox_value_aux <- NULL
for(i in 1:length(lst_cox_value)){
if(EVAL_METHOD=="AUC"){
df_cox_value_aux <- rbind(df_cox_value_aux, lst_cox_value[[i]]$AUC)
}else if(EVAL_METHOD=="C.Index"){
df_cox_value_aux <- rbind(df_cox_value_aux, lst_cox_value[[i]]$C.Index)
}else if(EVAL_METHOD=="IBS"){
df_cox_value_aux <- rbind(df_cox_value_aux, lst_cox_value[[i]]$IBS)
}
}
if(EVAL_METHOD=="IBS"){
df_cox_value_aux <- 1 - df_cox_value_aux #maximize 1-brier
}
rownames(df_cox_value_aux) <- names(list_KeepX_aux)
# MOVER DESPUES DE ALL_COMB para poder eleiminar aquella repetida
# we must not add the name if already exists in the list
p_val <- c(p_val, rep(NA, length(list_KeepX_aux))) #longitud es el el numero nuevo de posibles combinaciones posibles
names(p_val)[is.na(p_val)] <- names(list_KeepX_aux)
#index <- which.max(rowSums(df_cox_value_aux)) #MAX VAR_MEDIA
#index <- which.max(df_cox_value_aux[,"Y"]) #MAX Y?
index <- which.max(df_cox_value_aux) #MAX CONCORDANCE
best_c_index_aux <- df_cox_value_aux[index]
p_val[rownames(df_cox_value_aux)] <- df_cox_value_aux
if(best_c_index >= best_c_index_aux || abs(best_c_index_aux-best_c_index) <= MIN_AUC_INCREASE){
FLAG = FALSE
if(verbose){
message(paste0("End: \n"), paste0(paste0("Block ", names(best_keepX), ": ", unlist(purrr::map(best_keepX, ~unique(.)))), "\n"), paste0("Pred. Value (", EVAL_METHOD ,"): ", round(best_c_index, 4), "\n"))
}
}else{
best_c_index <- best_c_index_aux
best_keepX <- list_KeepX_aux[[index]]
if(verbose){
message(paste0("New Vector found: \n"), paste0(paste0("Block ", names(best_keepX), ": ", unlist(purrr::map(best_keepX, ~unique(.)))), "\n"), paste0("Pred. Value (", EVAL_METHOD ,"): ", round(best_c_index_aux, 4), "\n"))
}
}
}
aux_df <- t(data.frame(sapply(names(p_val), strsplit, "_")))
aux_df <- as.data.frame(aux_df)
rownames(aux_df) <- names(p_val)
colnames(aux_df) <- names(aux_vector)
for(cn in colnames(aux_df)){
aux_df[,cn] <- as.numeric(aux_df[,cn])
aux_df <- aux_df[order(aux_df[,cn]),]
}
p_val <- p_val[rownames(aux_df)]
keepX <- best_keepX
return(list(best.keepX = keepX, p_val = p_val))
}
#' getDesign.MB
#' @description Computes a new design matrix for the multi-block data by running individual PLS
#' between all omics and calculating its correlation.
#'
#' @details The `getDesign.MB` function follows the suggestion made by the mixOmics group
#' for computing design matrices for their algorithms. For more information, check
#' https://mixomicsteam.github.io/mixOmics-Vignette/id_06.html#id_06:diablo-design.
#'
#' @param Xh List of explanatory blocks.
#' @return A design matrix optimized for the X multi-omic data.
#'
#' @author Pedro Salguero Garcia. Maintainer: pedsalga@upv.edu.es
#'
#' @export
#'
#' @examples
#' data("X_multiomic")
#' X <- X_multiomic
#' design <- getDesign.MB(X)
#'
getDesign.MB <- function(Xh){
# set up a full design where every block is connected
design = matrix(1, ncol = length(Xh), nrow = length(Xh),
dimnames = list(c(names(Xh)), c(names(Xh))))
diag(design) = 0
# AUTO DESIGN - https://mixomicsteam.github.io/mixOmics-Vignette/id_06.html#id_06:diablo-design
for(i in 1:(nrow(design)-1)){
b <- rownames(design)[[i]]
for(j in (i+1):nrow(design)){
o <- rownames(design)[[j]]
if(b == o){
design[i,j] = 0
}else{
aux_pls <- mixOmics::pls(Xh[[b]], Xh[[o]], ncomp = 1)
aux_cor <- cor(aux_pls$variates$X, aux_pls$variates$Y)
aux_cor <- round(aux_cor, 1)
design[i,j] <- aux_cor
design[j,i] <- aux_cor
}
}
}
return(design)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.