# ##' Cross-validation function for fusedanova method.
# ##'
# ##' Function that computes K-fold cross-validated error of a
# ##' \code{fusedanova} fit. Possibility to perform V times the whole
# ##' K-fold CV to average the error and reduce the reduce the CV
# ##' standard error around the minimum.
# ##'
# ##' @param x matrix whose rows represent individuals and columns
# ##' independant variables.
# ##'
# ##' @param class vector or factor giving the class of each individual.
# ##'
# ##' @param K integer indicating the number of folds. Default is 10.
# ##'
# ##' @param V integer indicating the number of times the K folds CV
# ##' will be averaged. Default is 1 (no averaging).
# ##'
# ##' @param verbose boolean for verbose mode. Default is \code{TRUE}.
# ##'
# ##' @param folds list of \code{K} vectors that describes the folds to
# ##' use for the cross-validation. By default, the folds are randomly
# ##' sampled with the specified K. The same folds are used for each
# ##' values of \code{lambdalist}.
# ##'
# ##' @param lambdalist list of \code{lambda} penalty parameters used
# ##' in the cross validation. By default, lambdalist is NULL and calculated
# ##' using the maximum \code{lambda} and the parameter \code{nlambda}.
# ##'
# ##' @param ... list of additional parameters to overwrite the defaults of the
# ##' fitting procedure. See the corresponding documentation (\code{\link{fusedanova}}).
# ##' Also include :
# ##' \itemize{%
# ##'
# ##' \item{\code{nlambda}: } {integer; the length
# ##' of the \code{lambdalist} vector, by default 100}
# ##'
# ##' \item{\code{log.scale}: } {boolean; should a logarithmic scale be used
# ##' during the creation of \code{lambdalist}. By default, FALSE.
# ##' }
# ##'
# ##' \item{\code{min.ratio} : }{ numeric parameter setting the smallest value of lambdalist
# ##' in the \code{log.scale} case with the formula log10(\code{min.ratio}*\code{lambdamax}).
# ##' }
# ##' }
# ##'
# ##' @return An object of class "cv.fa" for which a \code{plot} method
# ##' is available.
# ##'
# ##' @seealso \code{\linkS4class{fusedanova}}, \code{\link{plot,cv.fa-method}}
# ##' and \code{\linkS4class{cv.fa}}.
# ##'
# ##' @examples \dontrun{
# ##' data(aves)
# ##' cv.out <- cv.fa(aves$weight, aves$family)
# ##' V100.cv.out <- cv.fa(aves$weight, aves$family, V=50)
# ##' }
# ##'
# ##' @keywords models, regression
# ##' @name cv.fa
# ##' @aliases cv.fa
# ##' @rdname cv.fa
# ##'
# ##' @export
# cv.fa <- function(x,
# class,
# K = 10,
# folds= split(sample(1:length(class)), rep(1:K, length=length(class))),
# lambdalist = NULL,
# V = 1,
# verbose = TRUE,
# ...) {
#
# ## =============================================================
# ## INITIALIZATION & PARAMETERS RECOVERY
#
# user <- list(...)
# defs <- default.args.cv()
# args <- modifyList(defs, user)
#
# if (Sys.info()[['sysname']] == "Windows") {
# args$mc.cores <- 1 # Windows does not support fork
# }
#
# if(!is.matrix(x))
# x <- as.matrix(x)
# n <- nrow(x)
# p <- ncol(x)
# if (missing(class))
# class <- 1:n
#
# if (args$checkargs) {
# if(any(is.na(x)))
# stop("NA value in x not allowed.")
# if(n != length(class))
# stop("x and y have not correct dimensions")
# if(length(unique(class))==1)
# stop("y has only one level.")
# if(!(args$weights %in% c("default","laplace","gaussian","adaptive","naivettest","ttest","welch","personal")))
# stop("Unknown weight parameter formulation. Aborting.")
# if (Sys.info()[['sysname']] == "Windows") {
# if(verbose){
# warning("\nWindows does not support fork, enforcing mc.cores to '1'.")
# }
# }
# args$checkargs = FALSE # not to check again in fused anova first call
# }
#
# ## =============================================================
# # FIRST RUN ON THE ALL DATASET
# # get the lambda max and a fused anova object on which we can use predict later
#
# firstrun = do.call(fusedanova,c(list(x=x,class=class),args))
# lambdamax = max(unlist(lapply(firstrun@result,function(x){max(x$table[,"lambda"])})))
#
# # generating the grid of lambda by ascending order
# if (is.null(lambdalist)) {
# if (args$log.scale == FALSE){
# args$lambdalist = seq(0,lambdamax,len=args$nlambda)
# } else{
# args$lambdalist = 10 ^ seq(log10(args$min.ratio*lambdamax),log10(lambdamax), len=args$nlambda)
# }
# } else{
# args$lambdalist = sort(lambdalist,decreasing=FALSE)
# }
#
# # overwrite nlambda and K
# args$nlambda = length(args$lambdalist)
# K <- length(folds)
#
# # list format for x and factor for y
# x = split(x, rep(1:p, each = n))
# if(!is.factor(class)){class=as.factor(class)}
#
# ## ====================================================
# ## SPLIT OCCURENCE TESTING AND LAUNCH
# if (args$splits==0){ # if we let the programm choose, overwrite the value
# if (args$weights=="default"||(args$weights %in% c("laplace","gaussian","adaptive") && args$gamma>=0) ||length(unique(class))<3){
# args$splits = 1
# }else{
# args$split=2
# }
# }
#
# one.fold <- function(k,z) {
# omit <- folds[[k]]
# if (args$standardize==TRUE){z = normalize.cv(x=z,group=class,omit=omit)}
# err <- simplecv(xtrain=z[-omit], ytrain=class[-omit], xtest = z[omit],ytest = class[omit], args=args)
# return(err = err)
# }
#
# if (V > 1 & verbose) {
# cat("\nAveraging ", V," ",K,"-folds cross-validation... might take a while!", sep="")
# cat("\nV =")
# } else {
# cat("\n",K,"-folds cross-validation...", sep="")
# }
#
# cv.list <- list()
# global.list <- list()
# folds.list <- list()
# for (v in 1:V) {
# if (V > 1 & verbose) {
# cat(" ", v)
# }
#
# folds.list[[v]] <- folds
# ## Errors contains a list of tables (rows <-> folds and col<-> lambdas) for each variable
# Errors <- lapply(x,function(z){
# err <- do.call(rbind,mclapply(1:K, function(k){one.fold(k,z)}, mc.cores= args$mc.cores,
# mc.preschedule=ifelse(K > 10,TRUE,FALSE)))
# return(err)
# })
#
# # CV return the mean of errors per fold and the std error.
# CV <- lapply(Errors,function(err){
# err2 = err^2
# meanerr <- colMeans(err)
# meanerr2 <- colMeans(err2)
# meanerr2 <- sqrt(1/(K-1)*(meanerr2 - meanerr^2)) # erreur sur la moyenne
# lambda.min = max(args$lambdalist[meanerr <= min(meanerr)])
# # if several lambda.min, we take the higher lambda
# return(list(cv.error = data.frame(err=meanerr,sd=meanerr2,lambdalist = args$lambdalist),
# lambda.min=lambda.min))
# })
#
# if (p>1){
# err = aaply(laply(Errors,as.matrix),c(2,3),mean) # mean on all variables for each fold
# err2 = err^2
# meanerr <- colMeans(err)
# meanerr2 <- colMeans(err2)
# meanerr2 <- sqrt(1/(K-1)*(meanerr2 - meanerr^2))
# lambda.min = max(args$lambdalist[meanerr <= min(meanerr)])
# global = list(cv.error = data.frame(err=meanerr,sd=meanerr2,
# lambdalist = args$lambdalist),lambda.min=lambda.min)
# }else{
# global=CV[[1]]
# }
# global.list[[v]] <- global
# cv.list[[v]] <- CV
#
# ## new folds
# folds <- split(sample(1:length(class)), rep(1:K, length=length(class)))
# }
#
# cv <- as.data.frame(apply(sapply(global.list, function(l) { l$cv.error}), 1, simplify2array))
#
# lb <- mean(sapply(global.list, function(l) { l$lambda.min}))
#
# cv <- data.frame(err=tapply(cv$err, cv$lambdalist, mean),
# sd =tapply(cv$sd, cv$lambdalist, function(x) sqrt(mean(x^2)/V)),
# lambdalist = args$lambdalist)
# rownames(cv) <- 1:nrow(cv)
# global <- list(cv.error= cv, lambda.min=lb)
#
# CV <- lapply(1:p, function(i) {
# cv <- as.data.frame(apply(sapply(cv.list, function(l) { l[[i]]$cv.error}), 1, simplify2array))
# lb <- mean(sapply(cv.list, function(l) { l[[i]]$lambda.min}))
# cv <- data.frame(err=tapply(cv$err, cv$lambdalist, mean),
# sd =tapply(cv$sd, cv$lambdalist, function(x) sqrt(mean(x^2)/V)),
# lambdalist = args$lambdalist)
# rownames(cv) <- 1:nrow(cv)
# return(list(cv.error= cv, lambda.min=lb))
# })
#
# return(new("cv.fa",
# byvariable = CV,
# global = global,
# folds = ifelse(V == 1, folds, folds.list),
# lambdalist =args$lambdalist,
# algorithm = algoType))
#
# }
#
# # return error
# simplecv <- function(xtrain,ytrain,xtest,ytest,args){
#
# # Objective : xm and xmtest should have the same length for c++ code
# # we modify ytest as a factor containing the levels of y and use tapply
# # if a level is not present in ytest, we replace NA by 0 so that it works and the error calculation is still correct
# # if a level is in ytest but not in y it's discarded
# ytrain = factor(ytrain) # (drop useless factor)
#
# index = ytest %in% ytrain # what to discard.
# ytest = factor(ytest[index],levels=levels(ytrain)) # discard but keep the ytrain levels
# xtest = xtest[index] # same
#
# ngroup = tapply(ytrain,ytrain,length) # vector of number by group
# xm = tapply(xtrain,ytrain,mean)
# xv = rep(0,length(xm))
# if (args$weights %in% c("welch", "naivettest", "ttest")){ # var needed if weights are of welch or ttest type
# xv = tapply(xtrain,ytrain,var)
# xv[is.na(xv)] <- 0
# }
#
# ngrouptest = tapply(ytest,ytest,length) # vector of number by group
# ngrouptest[is.na(ngrouptest)]=0
#
# xmtest = tapply(xtest, ytest, mean)
# xmtest[is.na(xmtest)] <- 0
#
# xvtest = tapply(xtest,ytest,var)*(ngrouptest-1)
# xvtest[is.na(xvtest)] <- 0
#
# # sort from the smallest beta to the highest
# ordre = order(xm)
# xm = xm[ordre]
# ngroup =ngroup[ordre]
# ngrouptest = ngrouptest[ordre]
# xv =xv[ordre]
# xmtest = xmtest[ordre] # sous forme de data frame plus rapide ????
#
# # decomposition of error (Huygens):
# # \sum_i{(\hat{Y_i}(\lambda)-Y_i)^2} = sum_k{ngroup(k)*(\hat{Y}_k - sum(Y_i in k))} + sum{Var(group_k)}
# errVar = sum(xvtest)
#
# errEst <- .Call("noSplitcv",R_x=xm,R_xv=xv, R_ngroup=ngroup, R_xtest =xmtest, R_ngrouptest=ngrouptest, R_args=args, PACKAGE="fusedanova")
#
# errEst = errEst + errVar
#
# return(errEst)
# }
#
# ###################################
# # normalization of a vector for cv
# ###################################
# normalize.cv <- function(x,group,omit){
# xtrain = x[-omit]
# grouptrain = group[-omit]
# Pooled = (nlevels(grouptrain)!= length(xtrain))
# if (Pooled ==FALSE){
# res = (x-mean(xtrain))/as.numeric((sqrt(var(xtrain))))
# }else{
# ntrain = length(xtrain)
# m = mean(xtrain)
# ngrouptrain = tabulate(grouptrain)
# ngrouptrain = ngrouptrain[ngrouptrain != 0]
# s = rowsum(xtrain^2,grouptrain) - (1/ngrouptrain)*(rowsum(xtrain,grouptrain))^2
# s[which(ngrouptrain==1)]=0
# if (sum(s)==0){
# res = (x-mean(xtrain))/as.numeric((sqrt(var(xtrain))))
# }else{
# s = sqrt(sum(s)/(ntrain-length(ngrouptrain)))
# res = (x-m)/s
# }
# }
# return(res)
# }
#
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.