Nothing
#' Run hierarchical clustering following by a group-lasso on all the different partition and a hierarchical testing procedure.
#' Only for linear regression problem.
#'
#' @title Full process of MLGL
#'
#' @author Quentin Grimonprez
#' @param X matrix of size n*p
#' @param y vector of size n.
#' @param hc output of \code{\link{hclust}} function. If not provided, \code{\link{hclust}} is run with ward.D2 method. User
#' can also provide the desired method: "single", "complete", "average", "mcquitty", "ward.D", "ward.D2", "centroid", "median".
#' @param control either "FDR" or "FWER"
#' @param alpha control level for testing procedure
#' @param test test used in the testing procedure. Default is \link{partialFtest}
#' @param fractionSampleMLGL a real between 0 and 1: the fraction of individuals to use in the sample for MLGL (see Details).
#' @param BHclust number of replicates for computing the distance matrix for the hierarchical clustering tree
#' @param nCore number of cores used for distance computation. Use all cores by default.
#' @param addRoot If TRUE, add a common root containing all the groups
#' @param Shaffer If TRUE, a Shaffer correction is performed (only if control = "FWER")
#' @param ... Others parameters for MLGL
#'
#' @return a list containing:
#' \describe{
#' \item{res}{output of \link{MLGL} function}
#' \item{lambdaOpt}{lambda values maximizing the number of rejects}
#' \item{var}{A vector containing the index of selected variables for the first \code{lambdaOpt} value}
#' \item{group}{A vector containing the values index of selected groups for the first \code{lambdaOpt} value}
#' \item{selectedGroups}{Selected groups for the first \code{lambdaOpt} value}
#' \item{reject}{Selected groups for all lambda values}
#' \item{alpha}{Control level}
#' \item{test}{Test used in the testing procedure}
#' \item{control}{"FDR" or "FWER"}
#' \item{time}{Elapsed time}
#' }
#'
#' @details
#' Divide the n individuals in two samples. Then the three following steps are done:
#' 1) Bootstrap Hierarchical Clustering of the variables of X
#' 2) MLGL on the second sample of individuals
#' 3) Hierarchical testing procedure on the first sample of individuals.
#'
#' @examples
#' # least square loss
#' set.seed(42)
#' X <- simuBlockGaussian(50, 12, 5, 0.7)
#' y <- X[, c(2, 7, 12)] %*% c(2, 2, -2) + rnorm(50, 0, 0.5)
#' res <- fullProcess(X, y)
#' @seealso \link{MLGL}, \link{hierarchicalFDR}, \link{hierarchicalFWER}, \link{selFDR}, \link{selFWER}
#'
#' @export
fullProcess <- function(X, ...) {
UseMethod("fullProcess")
}
#' @rdname fullProcess
#' @export
fullProcess.default <- function(X, y, control = c("FWER", "FDR"), alpha = 0.05, test = partialFtest,
hc = NULL, fractionSampleMLGL = 1 / 2, BHclust = 50, nCore = NULL,
addRoot = FALSE, Shaffer = FALSE, ...) {
loss <- "ls"
# if(loss == "logit" & identical(test, partialFtest))
# test = partialChisqtest
.checkFullProcess(X, y, hc, alpha, test, fractionSampleMLGL, loss)
n <- nrow(X)
# Split the data in 2
ind2 <- sample(n, floor(n * fractionSampleMLGL))
ind1 <- (1:n)[-ind2]
##### part 1: hierarchical clustering with half of the data
if (is.null(hc) | is.character(hc)) {
# center variables and sd = 1
Xb <- scale(X, center = TRUE, scale = FALSE)
Xb <- scale(Xb, center = FALSE, scale = sqrt(colSums(Xb^2) / n))
# hierarchical clustering
hc <- bootstrapHclust(Xb, frac = 1, B = BHclust, method = ifelse(is.character(hc), hc, "ward.D2"), nCore = nCore)
}
##### part 2: group-lasso
res <- MLGL(X[ind2, ], y[ind2], hc = hc, loss = loss, ...)
##### part 3: testing procedure
outTest <- HMT(res, X[ind1, ], y[ind1], control, alpha, test, addRoot, Shaffer)
outObj <- c(list(res = res), outTest)
class(outObj) <- "fullProcess"
return(outObj)
}
#' @param formula an object of class "formula" (or one that can be coerced to that class): a symbolic description of the
#' model to be fitted.
#' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing
#' the variables in the model. If not found in data, the variables are taken from environment (formula)
#'
#'
#' @rdname fullProcess
#'
#' @export
fullProcess.formula <- function(formula, data, control = c("FWER", "FDR"), alpha = 0.05,
test = partialFtest, hc = NULL, fractionSampleMLGL = 1 / 2,
BHclust = 50, nCore = NULL, addRoot = FALSE, Shaffer = FALSE, ...) {
cl <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
mt <- attr(mf, "terms")
y <- model.response(mf, "numeric")
X <- model.matrix(mt, mf)
X <- as.matrix(X[, -1])
res <- fullProcess.default(X, y, control, alpha, test, hc, fractionSampleMLGL, BHclust, nCore, addRoot, Shaffer, ...)
return(res)
}
# check parameters of MLGL function
.checkFullProcess <- function(X, y, hc, alpha, test, fractionSampleMLGL, loss) {
# check X
if (!is.matrix(X)) {
stop("X has to be a matrix.")
}
if (any(is.na(X))) {
stop("Missing values in X not allowed.")
}
if (!is.numeric(X)) {
stop("X has to be a matrix of real.")
}
# check y
if (!is.numeric(y)) {
stop("y has to be a vector of real.")
}
if (any(is.na(y))) {
stop("Missing values in y not allowed.")
}
if (loss == "logit" && any(y %in% c(-1, 1) == FALSE)) {
stop("Classification method requires the response y to be in {-1,1}")
}
# check if X and y are compatible
if (nrow(X) != length(drop(y))) {
stop("The length of y and the number of rows of X don't match.")
}
# check hc
if (!is.null(hc)) {
if (is.character(hc)) {
if (!(hc %in% c("single", "complete", "average", "mcquitty", "ward.D", "ward.D2", "centroid", "median"))) {
stop("In character mode, hc must be \"single\", \"complete\", \"average\", \"mcquitty\", \"ward.D\",
\"ward.D2\", \"centroid\" or \"median\".")
}
} else {
# check if hc is a hclust object
if (!inherits(hc, "hclust")) {
stop("hc must be an hclust object.")
}
# check if hc and X are compatible
if (length(hc$order) != ncol(X)) {
stop("hc is not a clustering of the p covariates of X.")
}
}
}
# alpha
if (length(alpha) != 1) {
stop("alpha must be a real between 0 and 1.")
}
if ((alpha <= 0) || (alpha > 1)) {
stop("alpha must be a real between 0 and 1.")
}
# check if test is a function
if (!is.function(test)) {
stop("test must be a function.")
}
# fractionSampleMLGL
if (length(fractionSampleMLGL) != 1) {
stop("fractionSampleMLGL must be a real between 0 and 1.")
}
if ((fractionSampleMLGL <= 0) || (fractionSampleMLGL >= 1)) {
stop("fractionSampleMLGL must be a real between 0 and 1.")
}
invisible(return(NULL))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.