R/cv.cLearn.R

Defines functions loss.score cv.cLearn

# cv.cLearn utilizes cross-validation to tuning lambda
cv.cLearn <- function(x, y, lambdaSeq = NULL, weight = rep(1, NCOL(x)), lossType='logistic', nlambda = 100, nfolds=5, parallel = TRUE,ratio  = 10/11, tol = 1e-4, maxIter = 10^3, intercept=FALSE, offset=NULL, ...){
  # check wether only 2 classes
  stopifnot(length(levels(factor(y))) == 2)
  nobs <- length(y)
  # set y if y is not 1 or -1
  if (as.numeric(levels(factor(y))[1]) != -1){
    y[factor(y)==levels(factor(y))[1]] <- -1
  }
  if (as.numeric(levels(factor(y))[2]) != 1){
    y[factor(y)==levels(factor(y))[2]] <- 1
  }

  # use glmnet for logistic loss
  if (lossType=='logistic'){
    fit <- glmnet::cv.glmnet(x=x, y=y, family='binomial', penalty.factor=weight, intercept = intercept, parallel = parallel,offset = offset)
    fit$glmnet.fit$coef <- fit$glmnet.fit$beta
    out <- list(cvm=fit$cvm,cvsd=fit$cvsd,cvup=fit$cvup,
                cvlo=fit$cvlo, cvraw = NULL, fit=fit$glmnet.fit, lambda.seq=fit$lambda, lambda.opt=fit$lambda.min)
    class(out) <- "cv.cLearn"
    return(out)
  }

  # fit on all data
  fit <- cLearn(x=x, y=y, lambdaSeq = lambdaSeq, weight = weight, lossType = lossType, nlambda = nlambda, ratio = ratio, tol = tol, maxIter = maxIter, ...)
  # set lambda
  lambdaSeq <- fit$lambda
  # start cross-validation
  foldid=sample(rep(seq(nfolds),length=nobs))
  if(nfolds<3)stop("nfolds must be bigger than 3; nfolds=5 recommended")
  outlist <- list()

  if (parallel & require(foreach)) {
    library(doParallel)
    n_cores <- detectCores(all.tests = FALSE, logical = TRUE)
    cl <- makeCluster(min(n_cores, nfolds))
    registerDoParallel(cl)
    outlist = foreach (i=seq(nfolds), .packages=c("concordLearn")) %dopar% {
      which=foldid==i
      cLearn(x=x[!which,,drop=FALSE], y=y[!which], lambdaSeq = lambdaSeq, weight = weight, lossType = lossType, nlambda = nlambda, ratio = ratio, tol = tol, maxIter = maxIter, ...)
    }
    stopCluster(cl)
  }else{
    for(i in seq(nfolds)){
      which=foldid==i
      outlist[[i]]=cLearn(x=x[!which,,drop=FALSE], y=y[!which], lambdaSeq = lambdaSeq, weight = weight, lossType = lossType, nlambda = nlambda, ratio = ratio, tol = tol, maxIter = maxIter, ...)
    }
  }

  predmat=matrix(NA,length(y),length(lambdaSeq))
  cvraw <- array(0,c(nfolds,length(lambdaSeq)))
  for(i in 1:nfolds){
    cvraw[i,] <- loss.score((foldid==i), outlist[[i]], x, y, lossType = lossType, ...)
  }

  cvm <- apply(cvraw,2,mean,na.rm=TRUE)
  cvsd <- apply(cvraw,2,sd,na.rm=TRUE)

  out <- list(cvm=cvm,cvsd=cvsd,cvup=cvm+cvsd,
           cvlo=cvm-cvsd, cvraw = cvraw, fit=fit, lambda.seq=lambdaSeq, lambda.opt=lambdaSeq[which.min(cvm)])
  class(out) <- "cv.cLearn"
  out
}

# set loss.score
loss.score <- function(testid, fit, x, y, lossType, ...){
  coef <- fit$coef
  x.train <- x[testid,]
  y.train <- y[testid]
  apply(coef, 2,function(t){mean(loss(y.train * x.train %*% t, lossType=lossType, order = 0, ...))})
}
muxuanliang/concordLearn documentation built on Dec. 21, 2021, 11:04 p.m.