R/add.r
In stepPenal: Stepwise Forward Variable Selection in Penalized Regression

Documented in findROC lassomodel objFun optimPenaLik optimPenaLikL2 penalBrier SimData stepaic StepPenal StepPenalL2 tuneParam tuneParamCL2

#'
#' Simulate data with normally distributed predictors and binary response
#' @name SimData
#' @param N sample size
#' @param beta coefficients (effect of informative predictors)
#' @param noise variables (effect of uninformative predictors)
#' @param corr Logical, if FALSE the function generates uncorrelated predictors,
#' if TRUE  the correlation between predictors is 0.5 by default and the user can supply a different value in
#' the corr.effect argument.
#' @param corr.effect the correlation between informative predictors.
#' @return A data frame N x p, where p is the total number of informative and uninformative predictors.
#' The first column of the dataframe is the binary response variable y
#' @details The response y follows a Binomial distribution with probability= exp(X*beta)/(1+exp(X*beta))
#' @export
#' @examples
#' # simulate data with N=100 (sample size) and 23 predictors; 4 informative and 20 noise
#'
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -4)
#' noise <- 5
#' N     <- 100
#' simData <- SimData(N=N, beta=beta, noise=noise, corr=FALSE)
#'

SimData <- function(N, beta, noise, corr=TRUE, corr.effect=0.5){
  #install.packages("mvtnorm")
  beta <- c(1,beta)

  if (corr==TRUE){
    sigma <- (diag((length(beta)-1))+corr.effect)- diag((length(beta)-1))*corr.effect
    X     <- mvtnorm::rmvnorm(N, mean = rep(0,(length(beta)-1)), sigma=sigma)
    X     <- cbind(rep(1,N), X)
    Xb    <- X %*% beta
    sigma2 <- (diag(noise)+corr.effect)- diag(noise)*corr.effect
    u     <- mvtnorm::rmvnorm(N, mean = rep(0,noise), sigma=sigma2)
  }
  if (corr==FALSE){
    X     <- matrix( stats::rnorm(N*(length(beta)-1)) , N, length(beta)-1)
    X     <- cbind(rep(1,N), X)
    Xb    <- X %*% beta
    u     <- matrix(stats::rnorm(N*noise), N, noise)
  }
  expit   <- function(x) { exp(x)/(1+exp(x))}
  prob    <- expit(Xb)
  y       <- stats::rbinom(N,1,prob)
  mydata  <- data.frame(y, X[,-1], u)
  names(mydata)[-1] <- c(paste0('X', 1:(length(beta)-1)), paste0('u', 1:noise))
  return(mydata)
}


#' Objective function
#'
#' Objective (non-convex) function to minimize (objFun=-logL+ lamda*CL, CL= (1-w)L0 + wL1)
#' @name objFun
#' @param x input matrix, of dimension nobs x nvars. Each row is an observation vector.
#' @param y binary response variable
#' @param lamda a tuning penalty parameter
#' @param w the weighting parameter for L1; then (1-w) is the weight for L0
#' @param beta coefficients
#' @param epsilon the continuity parameter
#' @return the value of the objective function evaluated at the given points.
#' @export
#' @examples
#'
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -1)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=FALSE)
#'
#' x  <- as.matrix(simData[,-1][,1])
#' y  <- as.matrix(simData$y)
#' betapoints <- seq(-2,2,0.01)
#'
#' lamda <- 1
#' w     <- 0.6
#' epsilon <- 0.1
#'
#' out <- numeric(length(betapoints))
#' for(i in 1:length(betapoints)){
#'  out[i]<- objFun(x, y, lamda=lamda, w=w, beta=betapoints[i], epsilon=epsilon)
#' }
#' plot(betapoints, out, type="l", ylab="objFun")
#'


objFun <- function(x, y, lamda, w, beta, epsilon) {

  logL <- function(x, y, beta){
    loglik   <- - sum( -y*log(1 + exp(-(x%*%beta))) - (1-y)*log(1 + exp(x%*%beta)))
    return(loglik)
  }
f1 <- logL(x, y,beta)

CL <- function(beta, w, epsilon) {
  # begin L0
  L0 <- function(beta, epsilon) {
    p <- length(beta)
    L0_j <- function(beta_j, epsilon) {
      y <- NULL
      if (abs(beta_j) >= epsilon) {
        y <- 1
      } else {
        y <- abs(beta_j) / epsilon
      }
      return(y)
    }
    y <- sum(sapply(beta, L0_j, epsilon))
    return(y)
  }
  # end L0
  beta <- as.matrix(beta)
  y <- (1-w) * L0(beta, epsilon) + w * norm(beta, type="1")
  return(y)
}
f  <- f1 + lamda*CL(beta, w, epsilon)
return(f)
}



#' Stepwise forward variable selection using penalized regression.
#'
#' Stepwise forward variable selection based on the combination of L1 and L0 penalties.
#' The optimization is done using the "BFGS" method in stats::optim
#' @name  StepPenal
#' @param Data should have the following structure: the first column must be the binary response variable y.
#' @param lamda the tuning penalty parameter
#' @param standardize Logical flag for the predictors' standardization, prior to fitting the model.
#' Default is standardize=TRUE
#' @param w the weight parameter for the sum (1-w)L0+ wL1
#' @return a list with the shrinked coefficients and the names of the selected variables, i.e those variables with
#' an estimated coefficient different from zero. It also returns the value of the objective function, evaluated for the
#' values of the coefficients.
#' @details lamda and w  parameters need to be tuned by cross-Validation using stepPenal::tuneParam
#' @references Vradi E, Brannath W, Jaki T, Vonk R. Model selection based on combined penalties for biomarker
#' identification. Journal of biopharmaceutical statistics. 2018 Jul 4;28(4):735-49.
#' @seealso \code{\link[stats]{optim}}
#' @export
#' @examples
#' # use the StepPenal function on a simulated dataset, with given lamda and w.
#'
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -1)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=FALSE)
#' \dontrun{
#' before <- Sys.time()
#' stepPenal<- StepPenal(Data=simData, lamda=1.5, w=0.3)
#' after <- Sys.time()
#' after-before
#'
#' (varstepPenal<- stepPenal$coeffP)
#' }

StepPenal <- function(Data, lamda,  w, standardize=TRUE) {
  if (standardize==TRUE){
    Data<- data.frame(cbind(y=Data$y,scale(Data[,-1])))
  }else{
    Data<- Data
  }
  expit   <- function(x) { exp(x)/(1+exp(x))}
  epsilon<- 0.01
  z    <- colnames(Data)[-1]
  # selected variables by mod.AIC: sz
  sz <- NULL
  n <- length(z)
  for(i in 1:n) {
    res.AIC <- NULL
    res.beta<- list()
    for (j in 1:n) {
      #generate model formula
      if(i==1){
        mod.form <- stats::as.formula(paste0('y~',z[j]))
        var <- c(paste0('y~',z[j]))
      } else {
        mod.form <- stats::as.formula(paste0('y~',paste(c(sz, z[j]), collapse='+')))
        var <- c(paste0('y~', paste(c(sz, z[j]), collapse='+')))
      }
      y     <- as.matrix(Data$y)
      x     <- as.matrix(Data[,-1][,c(sz, z[j]) ])


          objFun<- function(x, y, lamda, w, beta, epsilon) {

            logL <- function(x, y, beta){
              loglik   <- - sum( -y*log(1 + exp(-(x%*%beta))) - (1-y)*log(1 + exp(x%*%beta)))
              return(loglik)
            }
            f1 <- logL(x, y,beta)

            CL <- function(beta, w, epsilon) {
              # begin L0
              L0 <- function(beta, epsilon) {
                p <- length(beta)
                L0_j <- function(beta_j, epsilon) {
                  y <- NULL
                  if (abs(beta_j) >= epsilon) {
                    y <- 1
                  } else {
                    y <- abs(beta_j) / epsilon
                  }
                  return(y)
                }
                y <- sum(sapply(beta, L0_j, epsilon))
                return(y)
              }
              # end L0
              beta <- as.matrix(beta)
              y <- (1-w) * L0(beta, epsilon) + w * norm(beta, type="1")
              return(y)
            }
            f  <- f1 + lamda*CL(beta, w, epsilon)
            return(f)
          }

      useoptim <-  tryCatch({
        stats::optim(par=c(rep(1,(length(z[j])+length(sz)))),
              fn=objFun,
              lamda=lamda, w=w, epsilon=epsilon, x=x, y=y,
              method="BFGS", control=list(maxit=5000))
              # lower= rep(-100,(length(z[j])+length(sz))),
              # upper= rep(100,(length(z[j])+length(sz))) )
      }, error = function(err) {
        return(NA)
      })
  suppressWarnings(if(!is.na(useoptim)){
    res.AIC <- rbind.data.frame(res.AIC,
                data.frame(mod.AIC=useoptim$value, stringsAsFactors=FALSE))
    res.beta[[i]] <- round(useoptim$par,4)
      } else{
     res.AIC <- rbind.data.frame(res.AIC,
                                 data.frame(mod.AIC=c(999),stringsAsFactors=FALSE))
     res.beta[[i]] <- c(9999)
        })
    }
    # break if AIC is greater in last round i
    if(i>1 && !any(res.AIC$mod.AIC < best.AIC))
      break
    idx.min  <- which.min(res.AIC$mod.AIC)
    best.AIC <- res.AIC$mod.AIC[idx.min]
    sz       <- c(sz, z[idx.min])
    z        <- z[-idx.min]
    n        <- length(z)
    finalVar  <- c(z[j],sz)[-1]
    finalBeta <- unlist(res.beta)
    fvalue <- best.AIC
    names(finalBeta)<- finalVar
  }
 return(list(VarP=finalVar, coeffP=finalBeta, value=fvalue))
}



#' Stepwise forward variable selection using penalized regression.
#'
#' Stepwise forward variable selection based on the combination of L2 and L0 penalties.
#' The optimization is done using the "BFGS" method in stats::optim
#' @name  StepPenalL2
#' @param Data should have the following structure: the first column must be the binary response variable y.
#' @param lamda the tuning penalty parameter
#' @param standardize Logical flag for the predictors' standardization, prior to fitting the model.
#' Default is standardize=TRUE
#' @param w the weight parameter for the sum (1-w)L0+ wL2
#' @return a list with the shrinked coefficients and the names of the selected variables, i.e those variables with
#' an estimated coefficient different from zero.
#' @details lamda and w  parameters need to be tuned by cross-Validation using stepPenal::tuneParam
#' @seealso \code{\link[stats]{optim}}
#' @export
#' @examples
#' # use the StepPenal function on a simulated dataset, with given lamda and w.
#'
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -1)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=TRUE)
#' \dontrun{
#' before <- Sys.time()
#' stepPenalL2 <- StepPenalL2(Data=simData, lamda=1.5, w=0.6)
#' after <- Sys.time()
#' after-before
#'
#' (varstepPenal<- stepPenalL2$coeffP)
#' }

StepPenalL2<- function(Data, lamda,  w, standardize=TRUE) {
  if (standardize==TRUE){
    Data<- data.frame(cbind(y=Data[,1],scale(Data[,-1])))
  }else{
    Data<- Data
  }
  expit   <- function(x) { exp(x)/(1+exp(x))}

  objFun2 <- function(x, y, lamda, w, beta, epsilon) {
    logL <- function(beta, x, y){
      loglik   <- - sum( -y*log(1 + exp(-(x%*%beta))) - (1-y)*log(1 + exp(x%*%beta)))
      return(loglik)
    }

    L0 <- function(beta, epsilon) {
      p <- length(beta)
      L0_j <- function(beta_j, epsilon) {
        y <- NULL
        if (abs(beta_j) >= epsilon) {
          y <- 1
        } else {
          y <- abs(beta_j) / epsilon
        }
        return(y)
      }
      y <- sum(sapply(beta, L0_j, epsilon))
      return(y)
    }

    CL2 <- function(beta, w, epsilon) {
      beta <- as.matrix(beta)
      y <- (1-w) * L0(beta, epsilon) + w * norm(beta, type="f")
      return(y)
    }
    f1 <- logL(beta, x, y)
    f  <- f1 + lamda*CL2(beta, w, epsilon)
    return(f)
  }

  epsilon<- 0.01
  z    <- colnames(Data)[-1]
  # selected variables by mod.AIC: sz
  sz <- NULL
  n <- length(z)
  for(i in 1:n) {
    res.AIC <- NULL
    res.beta<- list()
    for (j in 1:n) {
      #generate model formula
      if(i==1){
        mod.form <- stats::as.formula(paste0('y~',z[j]))
        var <- c(paste0('y~',z[j]))
      } else {
        mod.form <- stats::as.formula(paste0('y~',paste(c(sz, z[j]), collapse='+')))
        var <- c(paste0('y~', paste(c(sz, z[j]), collapse='+')))
      }
      y     <- as.matrix(Data$y)
      x     <- as.matrix(Data[,-1][,c(sz, z[j]) ])

      useoptim <-  tryCatch({
        stats::optim(par=c(rep(1,(length(z[j])+length(sz)))),
              fn=objFun2,
              lamda=lamda, w=w, epsilon=epsilon, x=x, y=y,
              method="BFGS", control=list(maxit=5000))
              # lower= rep(-100,(length(z[j])+length(sz))),
              # upper= rep(100,(length(z[j])+length(sz))) )
      }, error = function(err) {
        return(NA)
      })
      suppressWarnings(if(!is.na(useoptim)){
        res.AIC <- rbind.data.frame(res.AIC,
                                    data.frame(mod.AIC=useoptim$value, stringsAsFactors=FALSE))
        res.beta[[i]] <- round(useoptim$par,4)
      } else{
        res.AIC <- rbind.data.frame(res.AIC,
                                    data.frame(mod.AIC=c(999),stringsAsFactors=FALSE))
        res.beta[[i]] <- c(999)
      })
    }
    # break if AIC is greater in last round i
    if(i>1 && !any(res.AIC$mod.AIC < best.AIC))
      break
    idx.min  <- which.min(res.AIC$mod.AIC)
    best.AIC <- res.AIC$mod.AIC[idx.min]
    sz       <- c(sz, z[idx.min])
    z        <- z[-idx.min]
    n        <- length(z)
    finalVar  <- c(z[j],sz)[-1]
    finalBeta <- unlist(res.beta)
    fvalue <- best.AIC
    names(finalBeta)<- finalVar
  }
  return(list(VarP=finalVar, coeffP=finalBeta, value=fvalue))
}


#' Variable selection based on the combined penalty CL= (1-w)L0 + wL1
#'
#' Methods to use for optimization include Hooke-Jeeves derivative-free minimization algorithm (hjk),
#' or the BFGS method (modified Quasi-Newton). This method does variable selection by shrinking
#' the coefficients towards zero using the combined penalty (CL= (1-w)L0 + wL1).
#'
#' @name  optimPenaLik
#' @param Data should have the following structure: the first column must be the response variable y
#' @param lamda tuning penalty parameter
#' @param w the weight parameter for the sum (1-w)L0+ wL1
#' @param standardize standardize Logical flag for x variable standardization, prior to fitting the model sequence.
#' The coefficients are always returned on the original scale. Default is standardize=TRUE
#' @param algorithms select between BFGS ('QN') or Hooke-Jeeves (hjk) algorithm.
#' @details it is recommended to use the tuneParam function to tune parameters lamda and w prior
#' using the optimPenaLik function.
#' @seealso \code{\link[stats]{optim}}
#' @return a list with the shrinked coefficients and the names of the selected variables, i.e those variables with
#' estimated coefficient different from zero.
#' @export
#' @examples
#' # use the optimPenaLik function on a simulated dataset, with given lamda and w.
#' \dontrun{
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -1)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=TRUE)
#'
#' # use BFGS
#'
#' before  <- Sys.time()
#' PenalQN <- optimPenaLik(Data=simData, lamda=1.5, w=0.7,
#'                      algorithms=c("QN"))
#' (tot <- Sys.time()-before)
#' PenalQN
#'
#'
#
#' # use Hooke-Jeeves algorithm
#'
#' before  <- Sys.time()
#' Penalhjk <- optimPenaLik(Data=simData, lamda=1.5, w=0.7,
#'                        algorithms=c("hjk"))
#' (totRun  <- Sys.time() - before)
#' # total run of approx 0.25sec
#'
#' Penalhjk
#'}
#'

optimPenaLik <-  function(Data, lamda, w, standardize=TRUE,
                          algorithms=c("QN","hjk") ){
  #install.packages("dfoptim")
  if (standardize==TRUE){
    Data<- data.frame(cbind(y=Data[,1],scale(Data[,-1])))
  }else{
    Data<- Data
  }
  epsilon <- 0.01

  #begin objFun
  objFun <- function(x, y, lamda, w, beta, epsilon) {
    logL <- function(x, y, beta){
      loglik   <- - sum( -y*log(1 + exp(-(x%*%beta))) - (1-y)*log(1 + exp(x%*%beta)))
      return(loglik)
    }
    f1 <- logL(x, y,beta)
    CL <- function(beta, w, epsilon) {
      # begin L0
      L0 <- function(beta, epsilon) {
        p <- length(beta)
        L0_j <- function(beta_j, epsilon) {
          y <- NULL
          if (abs(beta_j) >= epsilon) {
            y <- 1
          } else {
            y <- abs(beta_j) / epsilon
          }
          return(y)
        }
        y <- sum(sapply(beta, L0_j, epsilon))
        return(y)
      }
      # end L0
      beta <- as.matrix(beta)
      y <- (1-w) * L0(beta, epsilon) + w * norm(beta, type="1")
      return(y)
    }
    f  <- f1 + lamda*CL(beta, w, epsilon)
    return(f)
  }
  #end objFun

  y     <- as.matrix(Data$y)
  x     <- as.matrix(Data[,-1])
  out <- list()

  # ##  Optimization using Hooke-Jeeves #
  if("hjk" %in% algorithms){
    useoptimhjk <- tryCatch({
      dfoptim::hjk(par=c(rep(1,length(Data[,-1]))), fn=objFun,
          x=x, y=y , lamda=lamda, epsilon=epsilon, w=w)
      # lower= rep(-50, length(Data[,-1])),
      # upper= rep(50, length(Data[,-1])))
    }, error = function(err) {
      return(NA)
    })
    suppressWarnings(if(!is.na(useoptimhjk)){
      beta0   <- which(round(abs(useoptimhjk$par),3)>0.001)
      coefL2  <- useoptimhjk$par[abs(useoptimhjk$par)>0.001]
      varL2   <- colnames(Data)[-1][beta0]
      names(coefL2)<- varL2
      out <- c(out,list(varhjk=coefL2,value=useoptimhjk$value))
    }else {
      stop(paste("The optimization failed"))
    })
  }
  #
  ## Optimization using modified quasi-Newton #
  if("QN" %in% algorithms){
    useoptimQN <- tryCatch({
      stats::optim(fn=objFun,
            par=c(rep(1,length(Data[,-1]))),
            # lower=c(rep(-20,length(Data[,-1]))),
            # upper=c(rep(20,length(Data[,-1]))),
            x=x, y=y,
            method="BFGS", control=list(maxit=5000),
            lamda=lamda, epsilon=epsilon, w=w)
    }, error = function(err) {
      return(NA)
    })
    suppressWarnings(if(!is.na(useoptimQN)){
      betaQN  <- which(round(abs(useoptimQN$par),3)> 0.001)
      coefQN  <- round(useoptimQN$par[abs(useoptimQN$par)> 0.001],3)
      names(coefQN)<- colnames(Data[,-1][betaQN])
      out <- c(out,list(varQN= coefQN, valueQN=useoptimQN$value))
    }else {
      paste("The (QN) optimization failed and simulated annealing will be used instead")
      useoptimSA <- tryCatch({
        stats::optim(par=c(rep(1,length(Data[,-1]))),fn=objFun,
              x=x, y=y, method="SANN",
              lamda=lamda, epsilon=epsilon, w=w,
              control=list(maxit = 20000, temp = 800, tmax=800) )
      }, error = function(err) {
        return(NA)
      })
      suppressWarnings(if(!is.na(useoptimSA)){
        beta0   <- which(round(abs(useoptimSA$par),3)>0.001)
        coefL2  <- useoptimSA$par[abs(useoptimSA$par)>0.001]
        varL2   <- colnames(Data)[-1][beta0]
        names(coefL2)<- varL2
        out <- c(out,list(varSA=coefL2,value=useoptimSA$value))
      }else {
        paste("The (SA) optimization failed")
      })
    })
  }
  return(out)
}


#' Variable selection based on the combined penalty CL2= (1-w)L0 + wL2
#'
#' Methods to use for optimization include the
#' Hooke-Jeeves derivative-free minimization algorithm (hjk),
#' and the BFGS method (modified Quasi-Newton). This algorithm does variable selection
#' by shrinking the coefficients towards zero using the combined penalty (CL2= (1-w)L0 + wL2).
#'
#' @name  optimPenaLikL2
#' @param Data should have the following structure: the first column must be the response variable y
#' @param lamda tuning penalty parameter
#' @param w the weight parameter for the sum (1-w)L0+ wL2
#' @param standardize standardize Logical flag for x variable standardization, prior to fitting the model sequence.
#' The coefficients are always returned on the original scale. Default is standardize=TRUE
#' @param algorithms select between Simulated annealing or Differential evolution
#' @details it is recommended to use the tuneParam function to tune parameters lamda and w prior
#' using the optimPenaLik function.
#' @return a list with the shrinked coefficients and the names of the selected variables, i.e those variables with
#' estimated coefficient different from zero.
#' @export
#' @examples
#' \dontrun{
#' # use the optimPenaLik function on a simulated dataset, with given lamda and w.
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -1)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=TRUE)
#'
#' # example with Quasi-Newton:
#' before <- Sys.time()
#' PenalQN <- optimPenaLikL2(Data=simData, lamda=2, w=0.6,
#'                      algorithms=c("QN"))
#' after <- Sys.time()
#' after-before
#' PenalQN
#'}
#'
optimPenaLikL2 <-  function(Data, lamda, w, standardize=TRUE,
                            algorithms=c("QN","hjk") ){
  if (standardize==TRUE){
    Data<- data.frame(cbind(y=Data[,1],scale(Data[,-1])))
  }else{
    Data<- Data
  }
  epsilon <- 0.01

  #begin objFun
  objFun2 <- function(x, y, lamda, w, beta, epsilon) {
    logL <- function(x, y, beta){
      loglik   <- - sum( -y*log(1 + exp(-(x%*%beta))) - (1-y)*log(1 + exp(x%*%beta)))
      return(loglik)
    }
    f1 <- logL(x, y,beta)
    ###
    CL <- function(beta, w, epsilon) {
      # begin L0
      L0 <- function(beta, epsilon) {
        p <- length(beta)
        L0_j <- function(beta_j, epsilon) {
          y <- NULL
          if (abs(beta_j) >= epsilon) {
            y <- 1
          } else {
            y <- abs(beta_j) / epsilon
          }
          return(y)
        }
        y <- sum(sapply(beta, L0_j, epsilon))
        return(y)
      }
      # end L0
      beta <- as.matrix(beta)
      y <- (1-w) * L0(beta, epsilon) + w * norm(beta, type="f")
      return(y)
    }
    ###
    f  <- f1 + lamda*CL(beta, w, epsilon)
    return(f)
  }
  #end objFun

  y     <- as.matrix(Data$y)
  x     <- as.matrix(Data[,-1])
  out <- list()

  # ##  Optimization using Hooke-Jeeves #
  if("hjk" %in% algorithms){
    useoptimhjk <- tryCatch({
      dfoptim::hjk(par=c(rep(1,length(Data[,-1]))),
          fn=objFun2,
          x=x, y=y , lamda=lamda, epsilon=epsilon, w=w)
      # lower= rep(-50, length(Data[,-1])),
      # upper= rep(50, length(Data[,-1])))
    }, error = function(err) {
      return(NA)
    })
    suppressWarnings(if(!is.na(useoptimhjk)){
      beta0   <- which(round(abs(useoptimhjk$par),3)>0.001)
      coefL2  <- round(useoptimhjk$par[abs(useoptimhjk$par)>0.001],3)
      varL2   <- colnames(Data)[-1][beta0]
      names(coefL2)<- varL2
      out <- c(out,list(varhjk=coefL2,value=useoptimhjk$value))
    }else {
      stop(paste("The optimization failed"))
    })
  }

  ## Optimization using quasi-Newton #
  if("QN" %in% algorithms){
    useoptimQN <- tryCatch({
      stats::optim(fn=objFun2,
            par=c(rep(1,length(Data[,-1]))),
            # lower=c(rep(-10,length(Data[,-1]))),
            # upper=c(rep(10,length(Data[,-1]))),
            x=x, y=y,
            method="BFGS", control = list(maxit=5000),
            lamda=lamda, epsilon=epsilon, w=w)
    }, error = function(err) {
      return(NA)
    })
    suppressWarnings(if(!is.na(useoptimQN)){
      betaQN  <- which(round(abs(useoptimQN$par),3)> 0.001)
      coefQN  <- round(useoptimQN$par[abs(useoptimQN$par)> 0.001],3)
      names(coefQN)<- colnames(Data[,-1][betaQN])
      out <- c(out,list(varQN= coefQN, valueQN=useoptimQN$value))
    }else {
      paste("The (QN) optimization failed and simulated annealing will be used instead")
      useoptimSA <- tryCatch({
        stats::optim(par=c(rep(1,length(Data[,-1]))),fn=objFun2,
              x=x, y=y, method="SANN",
              lamda=lamda, epsilon=epsilon, w=w,
              control=list(maxit = 20000, temp = 800, tmax=800) )
      }, error = function(err) {
        return(NA)
      })
      suppressWarnings(if(!is.na(useoptimSA)){
        beta0   <- which(round(abs(useoptimSA$par),3)>0.001)
        coefL2  <- useoptimSA$par[abs(useoptimSA$par)>0.001]
        varL2   <- colnames(Data)[-1][beta0]
        names(coefL2)<- varL2
        out <- c(out,list(varSA=coefL2,value=useoptimSA$value))
      }else {
        paste("The (SA) optimization failed")
      })

    })
  }
  return(out)
}


#'
#' Tune parameters w and lamda using the CL penalty
#'
#' Does k-fold cross-validation with the function optimPenalLik and returns
#' the values of lamda and w that maximize the area under the ROC.
#'
#' @name  tuneParam
#' @param Data a data frame, as a first column should have the response variable y and the other columns the predictors
#' @param nfolds the number of folds used for cross-validation. OBS! nfolds>=2
#' @param grid a grid (data frame) with values of lamda and w that will be used for tuning
#' to tune the model. It is created by expand.grid see example below
#' @param algorithm choose between BFGS ("QN") and hjk (Hooke-Jeeves optimization free) to be used for optmization
#' @return A matrix with the following: the average (over folds) cross-validated AUC, the totalVariables selected on the training set,
#' and the standard deviation of the AUC over the nfolds.
#' @details It supports the BFGS optimization method ('QN') from the optim {stats} function, the Hooke-Jeeves derivative-free
#' minimization algorithm ('hjk')
#' The value of lamda and w that yield the maximum AUC on the
#' cross-validating data set is selected.
#'
#' @export
#' @examples
#' \dontrun{
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -4)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=TRUE)
#'
#' nfolds  <- 3
#' grid <- expand.grid(w = c( 0.3, 0.7),
#'                    lamda = c(1.5))
#'
#' before <- Sys.time()
#' paramCV <- tuneParam(simData, nfolds, grid, algorithm=c("QN"))
#' (totalTime <- Sys.time() - before)
#'
#'
#' maxAUC    <- paramCV[which.max(paramCV$AUC),]$AUC
#' allmaxAUC <- paramCV[which(paramCV$AUC==maxAUC),] # checks if the value of AUC
#' # is unique; if is not unique then it will take the combination of lamda and
#' # w where lamda has the largest value- thus achieving higher sparsity
#'
#' runQN   <- optimPenaLik(simData, lamda= allmaxAUC[nrow(allmaxAUC),]$lamda,
#'                          w= allmaxAUC[nrow(allmaxAUC),]$w,
#'                          algorithms=c("QN"))
#' (coefQN  <- runQN$varQN)
#'
#'
#' # check the robustness of the choice of lamda
#'
#' runQN2   <- optimPenaLik(simData, lamda= allmaxAUC[1,]$lamda,
#'                          w= allmaxAUC[1,]$w,
#'                          algorithms=c("QN"))
#' (coefQN2  <- runQN2$varQN)
#'
#' }

tuneParam <- function(Data, nfolds=nfolds, grid, algorithm=c("hjk", "QN")){
  if (nfolds==1){
    stop(paste(nfolds,"You need to supply nfolds with a value >=2"))
  }

# begin function to calculatethe CV AUC
  findAUC <- function(Data, valiData, coefPenal){
    #expit <- function(x) { exp(x)/(1+exp(x))}

    if (length(coefPenal)<1){
      aucpenalTr <- c(0)
      aucpenalV  <- c(0)
      out  <- list(aucpenalTr=aucpenalTr,
                   aucpenalV=aucpenalV)
    }else{
      aucTr <- tryCatch({
        Xmat    <- Data[,-1]
        Xpenal  <- Xmat[,names(coefPenal)]
      }, error = function(err) {
        return(NA)
      })
      suppressWarnings(if(!is.na(aucTr)){
        Xmat  <- Data[,-1]
        Xpenal    <- Xmat[,names(coefPenal)]
        Xbpenal   <- round(as.matrix(Xpenal)%*%coefPenal,4)
        aucpenalTr <- pROC::roc(Data[,1], c(Xbpenal))

        XpenalV   <- valiData[,-1][,names(coefPenal)]
        # XbpenalV  <- round(expit(as.matrix(XpenalV)%*%coefPenal),4)
        XbpenalV  <- round(as.matrix(XpenalV)%*%coefPenal,4)
        aucpenalV <- pROC::roc(valiData[,1], c(XbpenalV))

        # Results:
        out  <- list(aucpenalTr=aucpenalTr$auc,
                     aucpenalV=aucpenalV$auc,
                     senspenalV=aucpenalV$sensitivities,
                     specpenalV=aucpenalV$specificities)
      }else{
        out<- list(aucpenalTr=c(0),
                   aucpenalV=c(0),
                   senspenalV=c(0),
                   specpenalV=c(0))
      })
    }
    return(out)
  }
## end findAUC
  y     <- Data$y
  folds <- caret::createMultiFolds(y, k=nfolds, times=1)
  models <- vector("list", nrow(grid) )
  keepCV <- list()

  fun <- function(i){
    inTrain <- folds[[i]]
    trainData <- Data[inTrain,]
    cvData <- Data[-inTrain,]

    inloop <- lapply( 1:nrow(grid), function(p){
      modelQN <- optimPenaLik(trainData, lamda=grid$lamda[p],
                              w=grid$w[p],
                              algorithms= algorithm)
      if("hjk" %in% algorithm){
        coefModel <- modelQN$varhjk
      } else{
        coefModel <- modelQN$varQN
      }
      perfQN  <- findAUC(trainData, cvData, coefPenal=coefModel)

      modelCV <- data.frame(lamda= grid$lamda[p],
                            w= grid$w[p],
                            devQN= modelQN$value,
                            complex= length(coefModel),
                            aucQNTr= perfQN$aucpenalTr,
                            aucQNCV= perfQN$aucpenalV)
      models[[p]] <- modelCV
    })

    keepCV <- c(keepCV,list( models= inloop))
    return(keepCV)
  }

  outune <- lapply(1:length(folds),function(i) fun(i))

  fol0 <- lapply(1:nfolds, function(m) outune[[m]]$models)
  fol  <- lapply(fol0, function(x) {do.call(rbind, x)})
  tm   <- lapply(1:nfolds, function(i) { c(fol[[i]]$aucQNCV)})
  stepQNCV <- colMeans(do.call(rbind, tm))

  matr <- do.call(rbind, tm)
  sdAUC <- base::apply(matr, 2 , stats::sd)

  tVar   <- lapply(1:nfolds, function(i) { c(fol[[i]]$complex)})
  totVar <- base::colMeans(do.call(rbind, tVar))

  perfMat  <- data.frame(grid, AUC=round(stepQNCV,3),
                         sdAUC=sdAUC, totVar=totVar)

  return(perfMat)
}


#'
#' Tune parameters w and lamda using the CL2 penalty
#'
#' Does k-fold cross-validation with the function optimPenalLikL2 and returns
#' the values of lamda and w that maximize the area under the ROC.
#'
#' @name  tuneParamCL2
#' @param Data a data frame, as a first column should have the response variable y and the other columns the predictors
#' @param nfolds the number of folds used for cross-validation. OBS! nfolds>=2
#' @param grid a grid (data frame) with values of lamda and w that will be used for tuning
#' to tune the model. It is created by expand.grid see example below
#' @param algorithm choose between BFGS ("QN") and hjk (Hooke-Jeeves optimization free) to be used for optmization
#' @return A matrix with the following: the average (over folds) cross-validated AUC, the totalVariables selected on the training set,
#' and the standard deviation of the AUC over the nfolds
#' @details It supports the BFGS optimization method ('QN') from the optim {stats} function, the Hooke-Jeeves derivative-free
#' minimization algorithm ('hjk').
#' The value of lamda and w that yield the maximum AUC on the
#' cross-validating data set is selected. If more that one value of lamda nad w yield the same AUC,
#' then the biggest values of lamda and w are choosen.
#' @export
#'
#'
tuneParamCL2 <- function(Data, nfolds=nfolds, grid, algorithm=c("QN")){
  if (nfolds==1){
    stop(paste(nfolds,"You need to supply nfolds with a value >=2"))
  }

  # begin function to calculatethe CV AUC
  findAUC <- function(Data, valiData, coefPenal){
    #expit <- function(x) { exp(x)/(1+exp(x))}

    if (length(coefPenal)<1){
      aucpenalTr <- c(0)
      aucpenalV  <- c(0)
      out  <- list(aucpenalTr=aucpenalTr,
                   aucpenalV=aucpenalV)
    }else{
      aucTr <- tryCatch({
        Xmat    <- Data[,-1]
        Xpenal  <- Xmat[,names(coefPenal)]
      }, error = function(err) {
        return(NA)
      })
      suppressWarnings(if(!is.na(aucTr)){
        Xmat  <- Data[,-1]
        Xpenal    <- Xmat[,names(coefPenal)]
        Xbpenal   <- round(as.matrix(Xpenal)%*%coefPenal,4)
        aucpenalTr <- pROC::roc(Data[,1], c(Xbpenal))

        XpenalV   <- valiData[,-1][,names(coefPenal)]
        # XbpenalV  <- round(expit(as.matrix(XpenalV)%*%coefPenal),4)
        XbpenalV  <- round(as.matrix(XpenalV)%*%coefPenal,4)
        aucpenalV <- pROC::roc(valiData[,1], c(XbpenalV))

        # Results:
        out  <- list(aucpenalTr=aucpenalTr$auc,
                     aucpenalV=aucpenalV$auc,
                     senspenalV=aucpenalV$sensitivities,
                     specpenalV=aucpenalV$specificities)
      }else{
        out<- list(aucpenalTr=c(0),
                   aucpenalV=c(0),
                   senspenalV=c(0),
                   specpenalV=c(0))
      })
    }
    return(out)
  }
  ## end findAUC
  y     <- Data$y
  folds <- caret::createDataPartition(y, times=nfolds, p=0.7)
  models <- vector("list", nrow(grid) )
  keepCV <- list()

  fun <- function(i){
    inTrain <- folds[[i]]
    trainData <- Data[inTrain,]
    cvData <- Data[-inTrain,]

    inloop <- lapply( 1:nrow(grid), function(p){
      modelQN <- optimPenaLikL2(trainData, lamda=grid$lamda[p],
                                w=grid$w[p],standardize = FALSE,
                                algorithms= algorithm)

      if("hjk" %in% algorithm){
        coefModel <- modelQN$varhjk
      } else{
        coefModel <- modelQN$varQN
      }
      perfQN  <- findAUC(trainData, cvData, coefPenal=coefModel)
      #BrierQN  <- stepPenalBrier(cvData, coeffP=coefModel)

      modelCV <- data.frame(lamda= grid$lamda[p],
                            w=grid$w[p],
                            devQN=modelQN$value,
                            complex=length(coefModel),
                            #BrierQN=BrierQN,
                            aucQNTr= perfQN$aucpenalTr,
                            aucQNCV= perfQN$aucpenalV)
      models[[p]] <- modelCV
    })

    keepCV <- c(keepCV,list( models= inloop))
    return(keepCV)
  }

  # if (parallel==TRUE){
  #   doMC::registerDoMC(ncores)
  #   outune <- foreach::foreach(i=1:length(folds)) %dopar% {
  #     fun(i)}
  # } else {
    outune <- lapply(1:length(folds),function(i) fun(i))
 # }

  fol0 <- lapply(1:nfolds, function(m) outune[[m]]$models)
  fol  <- lapply(fol0, function(x) {do.call(rbind, x)})

  tm   <- lapply(1:nfolds, function(i) { c(fol[[i]]$aucQNCV)})
  stepQNCV <- base::colMeans(do.call(rbind, tm))
  matr <- do.call(rbind, tm)
  sdAUC <- base::apply(matr, 2 ,stats::sd)

  tVar   <- lapply(1:nfolds, function(i) { c(fol[[i]]$complex)})
  totVar <- colMeans(do.call(rbind, tVar))

  # brier   <- lapply(1:nfolds, function(i) { c(fol[[i]]$BrierQN)})
  # brierCV <- colMeans(do.call(rbind, brier))

  perfMat  <- data.frame(grid, AUC=round(stepQNCV,3),
                         sdAUC=round(sdAUC,3),
                         #brier=brierCV,
                         totVar=totVar)
  return(perfMat)
}



#'
#' Stepwise forward variable selection based on the AIC criterion
#'
#' It is a wrapper function over the step function in the buildin package stats
#'
#' @name   stepaic
#' @param  Data a data frame, as a first column should hava the response variable y
#' @param standardize Logical flag for x variable standardization, prior to fitting the model sequence.
#'  Default is standardize=TRUE
#' @return a list with the coefficients of the final model.
#' It also returns the in-sample AUC and the Brier score
#' @seealso \code{\link[stats]{step}}
#' @export
#' @examples
#' \dontrun{
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -4)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=FALSE)
#'
#' stepaicfit <- stepaic(Data=simData)
#' stepaicfit
#' }


stepaic <- function(Data, standardize=TRUE){
  if (standardize==TRUE){
    Data<- data.frame(cbind(y=Data[,1], scale(Data[,-1])))
  }else{
    Data<- Data
  }
  ## Step AIC :
  null    <- stats::glm( factor(y) ~ 1 , data= Data, family="binomial")
  full    <- stats::glm( factor(y) ~ . , data= Data, family="binomial")
  fitaic  <- stats::step(null, scope= list(lower=null, upper=full), direction="forward", trace=F)
  coefaic <- stats::coef(fitaic)[-1]

  if(is.null(names(coefaic))==TRUE){
    print("stepAIC method selects zero variables")
  }else {
    # Xstepaic  <- Data[,-1][,names(coefaic)]
    # Xbstepaic <- round(as.matrix(Xstepaic)%*%coefaic,4)
    Xbstepaic    <- stats::predict(fitaic, type="response")
    aucstepaicTr <- pROC::roc(Data[,1], c(Xbstepaic))
    brierstepaic <- sum( (1/nrow(Data))*(Xbstepaic - Data[,1])^2)
  }
  outstep <- list(coefaic = stats::coef(fitaic)[-1],
                  #VarStepaic= names(coef(fitaic))[-1],
                  aucstepaicTr= aucstepaicTr,
                  brierstepaic= brierstepaic)
  return(outstep)
}


#'
#' Fits a lasso model and a lasso followed by a stepAIC algorithm.
#'
#' @name  lassomodel
#' @param  Data a data frame, as a first column should have the response variable y
#' @param standardize Logical flag for variable standardization, prior to fitting the model.
#' Default is standardize=TRUE. If variables are in the same units already, you might not
#' wish to standardize.
#' @param measure loss to use for cross-validation. measure="auc" is for two-class logistic regression only,
#' and gives area under the ROC curve. measure="deviance", uses the deviance for logistic regression.
#' @param nfold number of folds - default is 5. Although nfolds can be as large as the sample size (leave-one-out CV),
#' it is not recommended for large datasets. Smallest value allowable is nfolds=3
#' @return a list with the coefficients in the final model for the lasso fit and also for the lasso followed by stepAIC.
#' @seealso \code{\link[glmnet]{glmnet}}
#' @details the function lassomodel is a wrapper function over the glmnet::glmnet. The parameter lambda is tuned
#' by 10-fold cross-validation with the glmnet::cv.glmnet function.
#' The selected lambda is the one that gives either the minimum deviance (measure="deviance")
#' or the maximum auc (measure="auc") or minimum misclassification error (measure="class")
#' @export
#' @examples
#' \dontrun{
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -4)
#' noise   <- 5
#' simData <- SimData(N=100, beta=beta, noise=noise, corr=FALSE)
#'
#' lassofit <- lassomodel(Data=simData, measure="auc")
#' lassofit
#'
#' lassofit2 <- lassomodel(Data=simData, measure="deviance")
#' lassofit2
#'}
#'

lassomodel <- function(Data, standardize=TRUE, measure=c("deviance"), nfold=5){
  if(standardize==TRUE){
    Data <- data.frame(cbind(y=Data[,1],scale(Data[,-1])))
    ## Lasso fit:
    lassofit  <- glmnet::cv.glmnet(x=data.matrix(Data[,-1]),y=factor(Data$y), alpha=1,
                           standardize=FALSE, nfolds = nfold,
                           family="binomial", type.logistic="modified.Newton",
                           type.measure = measure)
  } else{
  lassofit  <- glmnet::cv.glmnet(x=data.matrix(Data[,-1]),y=factor(Data$y), alpha=1,
                         standardize=FALSE, nfolds = nfold,
                         family="binomial",type.measure=measure)
  }
    coeff     <- stats::coef(lassofit,lassofit$lambda.min)
    coefb     <- coeff@x[-1]
    variables <- names(which(coeff[,1][-1]!=0))
    names(coefb)<- variables

    datatopVar   <- cbind(y=Data[,1], Data[,variables])
    datatopVar   <- as.data.frame(datatopVar)
    nullasso     <- stats::glm( factor(y) ~ 1 , data= datatopVar, family="binomial")
    fullasso     <- stats::glm( factor(y) ~ . , data= datatopVar, family="binomial")
    lassostepaic <- stats::step(nullasso, scope=list(lower=nullasso, upper=fullasso),
                         direction="forward", trace=FALSE)

  out <- list(coeflasso= coefb,
              #VarlassoStepaic= names(coef(lassostepaic)[-1]),
              #varlasso= variables,
              minlamda=lassofit$lambda.min,
              coeflassostepaic= stats::coef(lassostepaic)[-1]
              )
  return(out)
}


#'
#' Evaluation of the performance of risk prediction models with binary status response variable.
#'
#' @name   penalBrier
#' @param  Data a data matrix; in the first column there should be the response variable y.
#' If you give the training dataset it will calculate the Brier score.
#' @param  coeffP  a named vector of coefficients
#' @return  the Brier score (misclassification error)
#' @details Brier score is a measure for classification performance of a binary classifier.
#' Its values range between [0,1] and the closest is to 0 the better the classifier is.
#' The area under the curve and the Brier score is used to summarize and compare the performance.
#' @export
#' @references Brier, G. W. (1950). Verification of forecasts expressed in terms of probability.
#' Monthly Weather Review 78.
#' @examples
#' # use the penalBrier function on a simulated dataset, with given lamda and w.
#' \dontrun{
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -4)
#' noise   <- 5
#' simData <- SimData(N=100,beta=beta, noise=noise, corr=FALSE)
#'
#' before   <- Sys.time()
#' stepPenal<- StepPenal(Data=simData, lamda=1.2, w=0.4)
#' (totRun  <- Sys.time() - before)
#'
#' (coeff<- stepPenal$coeffP)
#'  me <- penalBrier(simData,coeff)
#' }

penalBrier <- function(Data, coeffP){
  if(is.null(names(coeffP))==TRUE){
    stop(paste(names(coeffP),"elements of the coeffP were not found."))
  }else {
    expit   <- function(x) { exp(x)/(1+exp(x))}
    Xpenal     <- Data[,-1][,names(coeffP)]
    Xbpenal    <- round(expit(as.matrix(Xpenal)%*%coeffP),4)
    brierpenal <- sum( (1/nrow(Data))*(Xbpenal-Data[,1])^2)
  }
  return(brierpenal)
}


#' Compute the area under the ROC curve
#'
#' This function computes the numeric value of area under the ROC curve (AUC) with the trapezoidal rule.
#' It is a wrapper function around the pRoc function in the roc package
#'
#' @name   findROC
#' @param  Data a data matrix; in the first column there should be the binary response variable y.
#' If you give the training dataset it will calculate the in-sample AUC.
#' If supplied with a new dataset then it will return the predictive AUC.
#' @param  coeff vector of coefficients
#' @return The area under the ROC curve, the sensitivity and specificity
#' @seealso \code{\link[pROC]{roc}}
#' @export
#' @examples
#' \dontrun{
#' set.seed(14)
#' beta    <- c(3, 2, -1.6, -4)
#' noise   <- 5
#' simData <- SimData(N=100,beta=beta, noise=noise, corr=FALSE)
#'
#' stepPenal<- StepPenal(Data=simData, lamda=1.2, w=0.7)
#'
#' (coeffP <- stepPenal$coeffP)
#'
#' findROC(simData, coeff=coeffP)
#' }

findROC  <- function(Data, coeff){
  if(is.null(names(coeff))==TRUE){
    print("There were zero variables")
  }else {
    expit   <- function(x) { exp(x)/(1+exp(x))}
    Xmat     <-  Data[,-1]
    Xpenal   <- Xmat[,names(coeff)]
    Xbpenal  <- round(expit(as.matrix(Xpenal)%*%coeff),4)
    findAUC <-  pROC::roc(Data[,1], c(Xbpenal))
  }
  return(findAUC)
}
Any scripts or data that you put into this service are public.
stepPenal documentation built on May 1, 2019, 10:11 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
stepPenal
Stepwise Forward Variable Selection in Penalized Regression

R/add.r
In stepPenal: Stepwise Forward Variable Selection in Penalized Regression

Defines functions SimData objFun StepPenal StepPenalL2 optimPenaLik optimPenaLikL2 tuneParam tuneParamCL2 stepaic lassomodel penalBrier findROC

Documented in findROC lassomodel objFun optimPenaLik optimPenaLikL2 penalBrier SimData stepaic StepPenal StepPenalL2 tuneParam tuneParamCL2

Try the stepPenal package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stepPenal Stepwise Forward Variable Selection in Penalized Regression

R/add.r In stepPenal: Stepwise Forward Variable Selection in Penalized Regression

Defines functions SimData objFun StepPenal StepPenalL2 optimPenaLik optimPenaLikL2 tuneParam tuneParamCL2 stepaic lassomodel penalBrier findROC

Documented in findROC lassomodel objFun optimPenaLik optimPenaLikL2 penalBrier SimData stepaic StepPenal StepPenalL2 tuneParam tuneParamCL2

Try the stepPenal package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stepPenal
Stepwise Forward Variable Selection in Penalized Regression

R/add.r
In stepPenal: Stepwise Forward Variable Selection in Penalized Regression