R/crossvalidations/mx2bcv.R

Defines functions mx2bcv.Generator mx2bcv.validation standardVFCV

# 块正则化的mx2交叉验证方法。
#
# 直接基于正交表来构造块正则化的mx2交叉验证切分集合。
#
# Author: Wang Ruibo
# E-mail: wangruibo@sxu.edu.cn
# Date: XXXX/XX/XX
# 修改记录:
#   wrb@2017/11/8: 规范切分集合的取法,基于partitions取出切分集合.
#                  交叉验证全为从数据开始取出维度,摒弃n参数。
#   wrb@2019/05/11: 加入分层mx2bcv的切分方式,该切分仅对分类数据有效。

mx2bcv.Generator <- function(cvConf){
  # 给定m,给出数据指标集合的块正则化mx2交叉验证的切分集合。
  if(is.null(cvConf$data)) stop("Please specify cv$data.")
  n <- nrow(cvConf$data)
  m <- cvConf$m
  if (m >= 143 )  stop("the repitition count is out of range.")  
  if (n< m + 1 )  stop("repitition count cannot larger than n-1")
  nruns = m+(4-m%%4)
  plan <- oa.design(nruns = nruns, nfactors = nruns-1, nlevels = 2)
  oa<-matrix(t(as.numeric(as.matrix(plan))),nruns)
  orthArray <- data.frame((oa-1.5)*2)
  orthArray <- orthArray[, 1:m]
  blocks<- standardVFCV(n, 1, nrow(orthArray[1]), cvConf)
  mgroupsCVs<-vector("list",m)
  for(i in 1:m){
    split<-orthArray[i]       
    cvs<-vector("list",2)
    for(b in 1:nrow(split)){      
      if(split[b,1]==1){        
        if(is.null(cvs[[1]])){
          cvs[[1]]<-blocks[[b]]
        }else{
          cvs[[1]]<-cbind(t(as.numeric(cvs[[1]])),t(as.numeric(blocks[[b]])))
        }
      }else{
        if(is.null(cvs[[2]])){
          cvs[[2]]<-blocks[[b]]
        }else{
          cvs[[2]]<-cbind(t(as.numeric(cvs[[2]])),t(as.numeric(blocks[[b]])))
        }
      }
    }
    cvs[[1]]<-sort(cvs[[1]])
    cvs[[2]]<-sort(cvs[[2]])
    mgroupsCVs[[i]]<-cvs
  }
  mx2bcv <- list()
  mx2bcv[["partitions"]] <- vector("list", m * 2)
  for(j in 1:m) {
    cvsplit <- mgroupsCVs[[j]]
    mx2bcv[["partitions"]][[2*(j-1)+1]] <- list(cvsplit[[1]], cvsplit[[2]])
    mx2bcv[["partitions"]][[2*(j-1)+2]] <- list(cvsplit[[2]], cvsplit[[1]])
  }
  return(mx2bcv)
}

mx2bcv.Prepackages <- c("DoE.base")

mx2bcv.validation <- function(cvConf) {
  if(is.null(cvConf$m) ) {
    return(FALSE)
  }
  if(is.null(cvConf$n)) {
    warning("n is not given.")
  }
  if(is.null(cvConf$stratified)) {
    warning("stratified is set be false in default.")
  }   
  return(TRUE)
}


standardVFCV <- function(n, n1, v, cvConf) {
  ngroup <- v  
  ngroup <- trunc(ngroup)
  if( ngroup < 2){
    stop ("folds count is less than 2.")
  }
  if(ngroup > n){
    stop ("folds count is larger than n.")
  }
  data.set <- cvConf$data
  y <- data.set[,ncol(data.set)]
  stratified <- cvConf$stratified
  if(is.null(stratified)) {
    stratified <- F
  }
  groups <- vector("list",ngroup)
  if(stratified == F) {
    o <- sample(1:n)
    for(j in 1:n){
      jj <- (j-1) %% ngroup + 1
      cnt <- trunc((j-1) / ngroup)
      groups[[jj]][cnt + 1] <- o[j]
    }
  } else {
    fctr <- c(levels(factor(y)))
    fctr_count <- length(fctr)
    indexArrays <- vector("list", fctr_count)
    orderedArray <- c()
    for(fctr_idx in 1:fctr_count) {
      orderedArray <- c(orderedArray, which(y ==  fctr[fctr_idx:fctr_idx]))
    }  
    rep_count <- ceiling(length(y)/v)
    for(v_idx in 1:v) {
      bool_v <- rep(FALSE, v)
      bool_v[v_idx] <- TRUE
      bool_vec <- rep(bool_v, rep_count)
      bool_vec <- bool_vec[1:length(y)]
      groups[[v_idx]] <- orderedArray[which(bool_vec)]
    }
  }
  
  for(gIdx in 1:v)
    groups[[gIdx]]=sort(groups[[gIdx]])
  return(groups)  
}
RamboWANG/RegularizedCrossValidation documentation built on Oct. 10, 2019, 5:55 a.m.