R/cv.alfareg.R

Defines functions cv.alfareg

Documented in cv.alfareg

################################
#### Tuning the alfa in alfa-regression via K-fold cross validation
#### Tibshirani and Tibshirani method
#### Tsagris Michail 11/2015
#### mtsagris@yahoo.gr
#### References: Tsagris Michail (2015)
#### Regression analysis with compositional data containing zero values
#### Chilean Journal of Statistics, 6(2): 47-57
################################
cv.alfareg <- function( y, x, a = seq(0.1, 1, by = 0.1), nfolds = 10, folds = NULL, nc = 1, seed = NULL ) {
  ## y is the compositional data (dependent variable)
  ## x is the independent variables
  ## a is a range of values of alpha
  ## nfolds is the number of folds for the K-fold cross validation
  ## nc is how many cores you want to use, default value is 2
  if ( min(y) == 0 )  a <- a[a>0]
  la <- length(a)
  n <- dim(y)[1]
  ina <- 1:n
  x <- model.matrix( y~., data=as.data.frame(x) )
  if ( is.null(folds) )  folds <- Compositional::makefolds(ina, nfolds = nfolds,
                                                           stratified = FALSE, seed = seed)
  nfolds <- length(folds)

  if (nc <= 1) {
    apa <- proc.time()
    kula <- matrix(nrow = nfolds, ncol = la)
    for (j in 1:la) {
      ytr <- Compositional::alfa(y, a[j])$aff
      for ( i in 1:nfolds ) {
        xtest <- x[ folds[[ i ]], -1, drop = FALSE]
        ytest <- y[ folds[[ i ]], ]
        xtrain <- x[ -folds[[ i ]], -1]
        ytrain <- y[-folds[[ i ]], ]
        yb <- ytr[ -folds[[ i ]], ]
        yest <- CompositionalSR::areg(ytrain, xtrain, a[j], xnew = xtest, yb = yb)$est
        kl <- ytest * log(ytest / yest)
        kl[ is.infinite(kl) ] <- NA
        kula[i, j] <- sum(kl, na.rm = TRUE) / dim(yest)[1]
      }
    }

    kl <- Rfast::colmeans(kula)
    opt <- c( min(kl), a[ which.min(kl) ] )
    names(opt) <- c( "KLD", "alpha")
    apa <- proc.time() - apa

  } else {
    apa <- proc.time()
    val <- matrix(a, ncol = nc) ## if the length of a is not equal to the
    ## dimensions of the matrix val a warning message should appear
    requireNamespace("doParallel", quietly = TRUE, warn.conflicts = FALSE)
    cl <- parallel::makePSOCKcluster(nc)
    doParallel::registerDoParallel(cl)
    if ( is.null(folds) )  folds <- Compositional::makefolds(ina, nfolds = nfolds,
                                                             stratified = FALSE, seed = seed)
    kula <- foreach::foreach(j = 1:nc, .combine = cbind, .packages = "Rfast", .export = c("areg",
            "alfa", "helm", "comp.reg", "multivreg", "rowsums", "colmeans", "colVars") ) %dopar% {
            ba <- val[, j]
            ww <- matrix(nrow = nfolds, ncol = length(ba) )
            for ( l in 1:length(ba) ) {
              ytr <- Compositional::alfa(y, ba[l])$aff
              for (i in 1:nfolds) {
                xtest <- x[ folds[[ i ]], -1, drop = FALSE]
                ytest <- y[ folds[[ i ]], ]
                xtrain <- x[ -folds[[ i ]], -1]
                ytrain <- y[-folds[[ i ]], ]
                yb <- ytr[ -folds[[ i ]], ]
                yest <- CompositionalSR::areg(ytrain, xtrain, a[j], xnew = xtest, yb = yb)$est
                kl <- ytest * log(ytest / yest)
                kl[ is.infinite(kl) ] <- NA
                ww[i, l] <- sum(kl, na.rm = TRUE) / dim(yest)[1]

              }
            }
           return(ww)
    }
    parallel::stopCluster(cl)

    kula <- kula[, 1:la]
    kl <- Rfast::colmeans(kula)
    opt <- c( min(kl), a[ which.min(kl) ] )
    names(opt) <- c( "KLD", "alpha")
    apa <- proc.time() - apa

  }

  list(runtime = apa, perf = kl, opt = opt)
}

Try the CompositionalSR package in your browser

Any scripts or data that you put into this service are public.

CompositionalSR documentation built on March 28, 2026, 5:07 p.m.