Nothing
##' Fit a Fused ANOVA model
##'
##' Adjust a penalized ANOVA model with Fused-LASSO (or Total
##' Variation) penality, ie. a sum of weighted \eqn{\ell_1}{l1}-norm
##' on the difference of each coefficient. See details below.
##'
##' @param x matrix (or column vector) which rows represent
##' individuals and columns independant variables.
##'
##' @param class vector or factor giving the class of each
##' individual. If missing, \code{1:nrow(x)} is used (clustering mode
##' with one individual per class).
##'
##' @param ... list of additional parameters to overwrite the defaults of the
##' fitting procedure. Include :
##' \itemize{%
##'
##' \item{\code{weights}: } {character; which type of weights is
##' supposed to be used. The supported weights are :
##' \code{"default"}, \code{"laplace"}, \code{"gaussian"},
##' \code{"adaptive"}, \code{"naivettest"}, \code{"ttest"},
##' \code{"welch"} and \code{"personal"}. Se details below. By
##' default, its value is \code{"default"}.}
##'
##' \item{\code{gamma}: } {numeric; the \eqn{\gamma}{gamma} parameter needed for
##' \code{"laplace"}, \code{"gaussian"} and \code{"adaptive"} weights. By default, 0.}
##'
##' \item{\code{W}: } {numeric matrix; the matrix of weights needed if the \code{"personal"}
##' weights were selected. By default, a matrix with zero row and zero column.}
##'
##' \item{\code{standardize}: }{ logical; should each variable be standardized before the calculus ?
##' By default, \code{TRUE}.
##' }
##'
##' \item{\code{splits}: }{ integer; coding for forcing split or nosplit algorithms :
##' \itemize{%
##'
##' \item{\code{0} : }{Default value, let the programm decide
##' which algorithm to use depending on the choosen \code{weights}.}
##'
##' \item{\code{1} : }{Forces the algorithm not to take the splits into account.}
##'
##' \item{\code{2} : }{Forces the algorithm to take the splits into account even if
##' the solution paths contains no split.}
##' }
##' Note : For the moment, only the no split algorithm has been coded. Please ensure that
##' your weights choice leads to the no split algorithm or set \code{split} to \code{1}.
##' }
##'
##' \item{\code{epsilon}: }{numeric; tolerance parameter for numeric calculations.
##' By default, \eqn{10^-10}{eps}.
##' Note : this is currently not used.}
##'
##' \item{\code{checkargs}: }{logical; should arguments be checked to
##' (hopefully) avoid internal crashes? Default is \code{TRUE}.
##' Automatically set to \code{FALSE} when a call is made
##' from cross-validation}
##'
##' \item{\code{lambdalist}: }{numeric vector; a set of \eqn{\lambda}{lambda} value for which
##' a prediction is asked. By default, a null vector. If the length of \code{lambdalist}
##' is not \code{0}, the \code{fusedanova} class returnded by the \code{fusedanova} function will
##' have a not null attribute \code{prediction}.
##' }
##'
##' \item{\code{mc.cores}: } {integer; the number of cores to use. The default uses all
##' the cores available. }
##'
##' \item{\code{verbose}: } {boolean; should the code print out its progress.
##' By default, FALSE. }
##'
##' \item{\code{mxSplitSize}: } {integer; the maximum size for a group for being checked
## for eventual splitting. By default, 100.
##' the cores available. }
##'
##' }
##'
##' @return an object with class \code{fusedanova}, see the
##' documentation page \code{\linkS4class{fusedanova}} for details.
##'
##' The optimization problem solved by fused-ANOVA is
##' \if{latex}{\deqn{%
##' \hat{\beta}_{\lambda} = \arg \min_{\beta}
##' \left\{\sum_{k=1}^K \sum_{i=1}^{n_k} \left(Y_{ik}-\beta_k \right)^2
##' + \lambda \sum_{k,\ell} w_{kl} \left|\beta_k - \beta_\ell \right|\right\}}}
##' \if{html}{\out{ <center> β<sup>hat</sup>
##' <sub>λ<sub>1</sub></sub> =
##' argmin<sub>β</sub> sum<sub>k</sub> sum_i (Y<sub>ik</sub> - &beta<sub>k</sub>)<sup>2</sup>
##' + λ sum<sub>k,l</sub> w<sub>k,l</sub>
##' | β<sub>k</sub> - β<sub>l</sub> |, </center> }}
##' \if{text}{\deqn{beta.hat(lambda) = argmin_beta sum_k sum_i (Y_ik - beta_k)^2
##' + lambda sum_k sum_l w_kl | beta_k - beta_l|,}}
##'
##' where \eqn{Y_{ik}}{Y_ik} is the intensity of a continuous random
##' variable for sample \eqn{i}{i} in condition \eqn{k}{k} and
##' \eqn{\beta_k}{beta_k} is the mean parameter of condition
##' \eqn{k}{k}. We denote by \eqn{K}{K} the total number of conditions
##' and \eqn{n_k}{n_k} the number of sample in each condition.
##'
##' More details related to the weights are coming...
##'
##' @seealso See also \code{\linkS4class{fusedanova}},
##' \code{\link{plot,fusedanova-method}} and \code{\link{cv.fa}}.
##' @name fusedanova
##' @rdname fusedanova
##' @keywords models, regression
##'
##' @examples \dontrun{
##' data(aves)
##' fa.laplace <- fusedanova(x=aves$weight, class=aves$family, weights="laplace", gamma=5)
##' plot(fa.laplace, labels=aves$order)
##'
##' fa.ttest <- fusedanova(x=aves$weight, class=aves$family, weights="naivettest")
##' plot(fa.ttest, labels=aves$order)
##'
##' fa.ada <- fusedanova(x=aves$weight, class=aves$family, weights="adaptive", gamma=2)
##' plot(fa.ada, labels=aves$order)
##' }
##'
##' @export
fusedanova <- function(x, class,...) {
## ===================================================
## GETTING DEFAULT PARAMS
user <- list(...)
defs <- default.args.fa()
args <- modifyList(defs, user)
## ===================================================
## CHECKS TO (PARTIALLY) AVOID CRASHES OF THE C++ CODE
if(!is.matrix(x))
x <- as.matrix(x)
p <- ncol(x) # problem size
n <- nrow(x) # sample size
if (missing(class))
class <- factor(1:n)
if (args$checkargs) {
if(any(is.na(x)))
stop("NA value in x not allowed.")
if(n != length(class))
stop("x and y have not correct dimensions")
if(length(unique(class))==1)
stop("y has only one level.")
if(!(args$weights %in% possibleWeights))
stop("Unknown weight parameter formulation. Aborting.")
if (Sys.info()[['sysname']] == "Windows") {
if(args$verbose){
warning("\nWindows does not support fork, enforcing mc.cores to '1'.")
}
args$mc.cores <- 1
}
if (length(args$lambdalist)!=0){ # sort by descending for effective prediction
args$lambdalist = sort(args$lambdalist, decreasing = TRUE)
}
}
## ====================================================
## CONVERSIONS OF x in a list
x <- split(x, rep(1:p, each = n))
if (!is.factor(class)){class <- as.factor(class)}
## ======================================================
## NORMALIZATION TREATMENT
if (args$standardize==TRUE){x=lapply(x,function(z){normalize(z,class)})} # centrer réduire
## ====================================================
## SPLIT OCCURENCE TESTING AND LAUNCH
if (args$splits==0){ # if we let the programm choose, overwrite the value
if (args$weights=="default"||(args$weights %in% c("laplace","gaussian","adaptive") && args$gamma>=0) ||length(unique(class))<3){
args$splits <- 1
}else{
args$split <- 2
}
}
if (args$split==1){
algoType <- "No Split"
if(args$verbose){cat("\nPath calculated without split")}
} else {
algoType <- "With possible Splits"
if(args$verbose){cat("\nPath calculated with possible splits")}
}
if (p > 1) {
result <- mclapply(x,function(z) {calculatepath(z,class,args)},
mc.cores= args$mc.cores, mc.preschedule=ifelse(p > 100,TRUE,FALSE)) # path algorithm
} else {
result <- list(calculatepath(x[[1]],class,args))
}
if (length(args$lambdalist) == 0){ # default parameter
l <- numeric(0)
prediction = list()
} else{
l <- args$lambdalist
prediction <- lapply(result, function(z){list(table=z$prediction, order = z$order)})
}
result <- lapply(result, function(z){list(table=z$table, order = z$order)})
## small warning on last beta
if (args$standardize==TRUE && abs(result[[1]]$table[1,1])>10^(-8)){
warning("There may be some approximation errors (Beta(lambda_max) far from 0). You may want to lower the gamma if you are using one.")}
return(new("fusedanova",result=result, prediction=prediction, classes = class, weights=args$weights, lambdalist=l,algorithm=algoType))
}
#########################################
# Calculate path with or without splits #
#########################################
# calculate the path for one gene
# x the vector of data, group the vector group belonging, args all the rest
calculatepath <- function(x, group, args){
ngroup <- tabulate(group)# vector of number by group
xm <- rowsum(x,group)/ngroup
xv <- rep(0,length(xm))
if (args$weights %in% varNeededWeights){
## var needed if weights are of welch or ttest type
xv <- ngroup/(ngroup-1)*(rowsum(x^2,group)/ngroup - xm^2)
}
ordre <- order(xm)
xm <- xm[ordre] # sort from the smallest beta to the highest
ngroup <- ngroup[ordre]
xv <- xv[ordre]
if (args$splits==1){
res <- .Call("noSplit",R_x=xm,R_xv=xv,R_ngroup=ngroup, R_args=args, PACKAGE="fusedanova")
}else{
res <- .Call("withSplit",R_x=xm,R_xv=xv,R_ngroup=ngroup, R_args=args, PACKAGE="fusedanova")
}
return(list(table=res$res, prediction=res$pred, order=ordre))
}
#############################
# normalization of a vector
#############################
normalize <- function(x,group){
Pooled = (nlevels(group)!= length(x))
if (Pooled == FALSE){
res = (x-mean(x))/as.numeric((sqrt(var(x))))
} else {
n <- length(x)
m <- mean(x)
ngroup <- tabulate(group)
s <- 1/(ngroup-1)*(rowsum(x^2,group) - (1/ngroup)*(rowsum(x,group))^2)
s[which(ngroup==1)] <- 0
if (sum(s)==0){
res <- (x-mean(x))/as.numeric((sqrt(var(x))))
} else{
s <- sqrt(sum(s*(ngroup-1))/(n-length(ngroup)))
res <- (x-m)/s
}
}
return(res)
}
#############################
# default args
#############################
default.args.fa <- function() {
return(list(
weights = "default",
W = matrix(nrow=0, ncol=0),
gamma = 0 ,
standardize =TRUE,
splits = 0,
epsilon =10^-10,
checkargs = TRUE,
lambdalist = numeric(0),
mc.cores = detectCores(),
verbose = FALSE,
mxSplitSize = 100
))
}
## constant list of treated weights
possibleWeights = c("default","laplace","gaussian","adaptive","naivettest","ttest","welch","personal")
varNeededWeights = c("welch", "ttest")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.