R/ecv.R
In EnsembleCV: Extensible Package for Cross-Validation-Based Integration of Base Learners

Documented in ecv.load ecv.regression ecv.regression.baselearner.control ecv.regression.integrator.control ecv.save plot.ecv.regression predict.ecv.regression

ecv.regression.baselearner.control <- function(baselearners=c("nnet", "rf", "svm", "gbm", "knn", "penreg")
  , baselearner.configs=make.configs(baselearners, type="regression"), npart=1, nfold=5) {
    return (list(configs=baselearner.configs, npart=npart, nfold=nfold))
}

ecv.regression.integrator.control <- function(errfun=rmse.error, method=c("default")) {
  return (list(errfun=rmse.error, method=method))
}

ecv.set.filemethod <- function(formula, data, instance.list, type="regression") FALSE # TODO: to be implemented

ecv.regression <- function(formula, data
  , baselearner.control=ecv.regression.baselearner.control()
  , integrator.control=ecv.regression.integrator.control()
  , ncores=1, filemethod=FALSE, print.level=1
  , preschedule = TRUE
  , schedule.method = c("random", "as.is", "task.length"), task.length) {
  if (integrator.control$method!="default") stop("invalid CV integration method")
  ncores.max <- try(detectCores(),silent=T)
  mycall <- match.call()
  if (!inherits(ncores.max,"try-error")) ncores <- min(ncores,ncores.max)
  if (print.level>=1 && ncores>1) cat("running in parallel mode, using", ncores, "cores\n")

  # training a batch of baselearners
  partitions.bl <- generate.partitions(baselearner.control$npart, nrow(data), baselearner.control$nfold)
  my.instance.list <- make.instances(baselearner.control$configs, partitions.bl)

  # TODO: determining of base learner estimation objects must be saved to disk or not
  if (missing(filemethod)) filemethod <- ecv.set.filemethod(formula, data, my.instance.list)
  
  if (print.level>=1) cat("CV training of base learners...\n")
  est.baselearner.cv.batch <- Regression.CV.Batch.Fit(my.instance.list, formula, data, ncores=ncores, filemethod=filemethod, print.level=print.level
                                                      , preschedule = preschedule, schedule.method = schedule.method, task.length = task.length)
  if (print.level>=1) cat("finished CV training of base learners\n")
  Xcv <- est.baselearner.cv.batch@pred
  y <- data[,all.vars(formula)[1]] # TODO: more robust way of extracting y (here and inside base learner functions)
  
  # trainig on full dataset is needed for a subset of methods
  if (print.level>=1) cat("full training of base learners...\n")
  #Regression.Batch.Fit <- function(config.list, formula, data, ncores=1, filemethod=FALSE, print.level=1)
  est.baselearner.batch <- Regression.Batch.Fit(baselearner.control$configs, formula, data, ncores=ncores, filemethod=filemethod, print.level=print.level)
  if (print.level>=1) cat("finished full training of base learners\n")
  Xfull <- est.baselearner.batch@pred

  my.integrator.config <- Regression.Select.MinAvgErr.Config(errfun=integrator.control$errfun, instance.list=my.instance.list)
  est.integrator <- Regression.Select.Fit(my.integrator.config, X=Xcv, y=y, print.level=print.level)
  #pred <- predict(est.integrator, Xnew=Xfull, config.list=baselearner.control$configs)
  pred <- est.integrator@pred
  
  ret <- list(call=mycall, formula=formula, instance.list=my.instance.list, integrator.config=my.integrator.config, method=integrator.control$method
    , est=list(baselearner.cv.batch=est.baselearner.cv.batch, baselearner.batch=est.baselearner.batch, integrator=est.integrator)
    , y=y, pred=pred, filemethod=filemethod)
  class(ret) <- "ecv.regression"
  if (filemethod) class(ret) <- c(class(ret), "ecv.file")
  return (ret)
}

print.ecv.regression <- function(x, ...) {
  cat("Call:\n")
  print(x$call)
}

predict.ecv.regression <- function(object, newdata=NULL, ncores=1, ...) {
  if (is.null(newdata)) return (object@pred)
  if (object$method=="default") {
    newpred.baselearner.batch <- predict(object$est$baselearner.batch, newdata, ncores=ncores, ...)
    newpred <- predict(object$est$integrator, Xnew=newpred.baselearner.batch, config.list=object$est$baselearner.batch@config.list, ...)
  } else {
    stop("invalid CV integration method")
  }
  return (newpred)
}

summary.ecv.regression <- function(object, ...) {
  #summary.baselearner <- summary(object$est$baselearner.cv.batch, errfun=object$integrator.config@errfun) # not implemented yet in EnsembleBase
  summary.baselearner <- NULL
  summary.integrator <- summary(object$est$integrator)
  ret <- list(baselearner=summary.baselearner, integrator=summary.integrator)
  class(ret) <- "summary.ecv.regression"
  return (ret)
}

print.summary.ecv.regression <- function(x, ...) {
  #cat("### baselearner summary ###\n")
  #print(x$baselearner)
  cat("### integrator summary ###\n")
  print(x$integrator)
}

plot.ecv.regression <- function(x, ...) {
  errfun <- x$integrator.config@errfun 
  error <- errfun(x$pred, x$y)
  #plot(x$est$baselearner.batch, errfun=x$integrator.config@errfun)
  plot(x$est$baselearner.cv.batch, errfun=x$integrator.config@errfun)
  abline(h=error, lty=2)
  #plot(x$est$baselearner.cv.batch, errfun=x$integrator.config@errfun)
  #abline(h=est$est$integrator@est$error.opt, lty=2)
  #cat("full error:", error, "\n")
  #cat("cv error:", est$est$integrator@est$error.opt, "\n")
}

## determine if save and load methods are generic enough to be applicable to ALL integrators
## if yes, we should move these functions to EnsembleBase to make them available to other derivative packages

ecv.save <- function(obj, file) {
  if (!("ecv.regression" %in% class(obj))) 
    stop("invalid object class (must be ecv.regression)")
  if (missing(file)) stop("must provide file argument")
  tmpfiles <- obj$est$baselearner.cv.batch@tmpfiles # obtain names of tmp files under cv.batch learners
  tmpfiles.full <- obj$est$baselearner.batch@tmpfiles # obtain names of tmp files under full batch learners
  
  if (is.null(tmpfiles)) { # ordinary save
    save(obj, file = file)
  } else {
    tmpfile.new <- tempfile() # create new tmp file name
    save(obj, file=tmpfile.new, compress=F) # save estimation object to tmp file
    all.files <- c(tmpfile.new, tmpfiles, tmpfiles.full) # consolidate names of tmp files for object as well as tmp files for cv and full batch learners
    all.files.basename <- basename(all.files) # extract basenames of all tmp files to be saved
    
    tmpdir <- paste0("./.", basename(tempfile("dir")),"/") # define a temp directory
    dir.create(tmpdir) # create the tmp directory
    all.files.new <- paste0(tmpdir, all.files.basename) # construct full path to all files
    file.copy(all.files, all.files.new) # copy files to tmp directory
    meta <- list(filename.mainobj=all.files.basename[1], filenames.batchobj=all.files.basename[1+1:length(tmpfiles)]
                 , filenames.batchobj.full=1+length(tmpfiles)+1:length(tmpfiles.full)) # create a list of meta-data
    save(meta, file=paste0(tmpdir, "meta"), compress=FALSE) # save meta-data list to tmp directory, under file "meta"
  
    tar(file, files=tmpdir, compression="gzip") # write content of "tmpdir" to "file"
    unlink(tmpdir, recursive=TRUE)
  }
}

ecv.load <- function(file) {
  env <- new.env()
  loadret <- suppressWarnings(try(load(file, envir = env), silent = TRUE))
  
  if (inherits(loadret, "try-error")) { # filemethod load
    filepaths <- untar(file, list=T)
    basenames <- basename(filepaths)
    dirnames <- dirname(filepaths)
    if (length(unique(dirnames))>1) stop("unexpected multiple directories in tar filepaths")
    metafile.index <- which(basenames=="meta")
    
    extdir <- dirnames[1] # this is where untar will extract the files to
    untar(file)
    meta <- NULL # to overcome codetools error: "no visible binding for meta"
    load(filepaths[metafile.index]) # this will load "meta"
    mainfile.index <- which(basenames==meta$filename.mainobj)
    load(filepaths[mainfile.index]) # this will load "obj"
    if (!identical(class(obj),c("ecv.regression","ecv.file"))) stop("invalid object class (must be ecv.regression & ecv.file)") # extend later to allow for non-regression models
    
    basenames.ordered <- basename(obj$est$baselearner.cv.batch@tmpfiles)
    basenames.ordered.full <- basename(obj$est$baselearner.batch@tmpfiles)
    #if (!identical(sort(basenames.ordered),sort(basenames[-c(metafile.index,mainfile.index)]))) stop("basenames mismatch")
    filepaths.ordered <- paste(extdir, basenames.ordered, sep="/")
    filepaths.ordered.full <- paste(extdir, basenames.ordered.full, sep="/")
    
    # copy batch files to new tempfiles in tempdir
    tmpfiles.new <- tempfile(rep("file", length(filepaths.ordered)))
    # replaced file.rename with file.copy and unlink to handle cross-device cases (where . and R tmp directories are on different devices)
    file.copy(from=filepaths.ordered, to=tmpfiles.new)
    unlink(filepaths.ordered, recursive=TRUE)
    
    tmpfiles.new.full <- tempfile(rep("file", length(filepaths.ordered.full)))
    # replaced file.rename with file.copy and unlink to handle cross-device cases (where . and R tmp directories are on different devices)
    file.copy(from=filepaths.ordered.full, to=tmpfiles.new.full)
    unlink(filepaths.ordered.full, recursive=TRUE)
  
    unlink(extdir, recursive=TRUE)
    
    obj$est$baselearner.cv.batch@tmpfiles <- tmpfiles.new
    n.instance <- length(obj$est$baselearner.cv.batch@instance.list@instances)
    for (i in 1:n.instance) {
      partid <- obj$est$baselearner.cv.batch@instance.list@instances[[1]]@partid
      nfold <- length(unique(obj$est$baselearner.cv.batch@instance.list@partitions[,partid]))
      for (j in 1:nfold) {
        obj$est$baselearner.cv.batch@fitobj.list[[i]]@fitobj.list[[j]]@est <- tmpfiles.new[obj$est$baselearner.cv.batch@tmpfiles.index.list$start[i]+j-1]
      }
    }
    
    obj$est$baselearner.batch@tmpfiles <- tmpfiles.new.full
    n.config <- length(obj$est$baselearner.batch@config.list)
    for (i in 1:n.config) {
      obj$est$baselearner.batch@fitobj.list[[i]]@est <- tmpfiles.new.full[i]
    }
    
    return (obj)
  } else { # ordinary load
    loadedObjects <- objects(env, all.names = TRUE)
    stopifnot(length(loadedObjects) == 1)
    return (env[[loadedObjects]])
  }
}

Any scripts or data that you put into this service are public.

EnsembleCV documentation built on April 4, 2025, 3:50 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

EnsembleCV
Extensible Package for Cross-Validation-Based Integration of Base Learners

R/ecv.R
In EnsembleCV: Extensible Package for Cross-Validation-Based Integration of Base Learners

Defines functions ecv.load ecv.save plot.ecv.regression print.summary.ecv.regression summary.ecv.regression predict.ecv.regression print.ecv.regression ecv.regression ecv.set.filemethod ecv.regression.integrator.control ecv.regression.baselearner.control

Documented in ecv.load ecv.regression ecv.regression.baselearner.control ecv.regression.integrator.control ecv.save plot.ecv.regression predict.ecv.regression

Try the EnsembleCV package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EnsembleCV Extensible Package for Cross-Validation-Based Integration of Base Learners

R/ecv.R In EnsembleCV: Extensible Package for Cross-Validation-Based Integration of Base Learners

Defines functions ecv.load ecv.save plot.ecv.regression print.summary.ecv.regression summary.ecv.regression predict.ecv.regression print.ecv.regression ecv.regression ecv.set.filemethod ecv.regression.integrator.control ecv.regression.baselearner.control

Documented in ecv.load ecv.regression ecv.regression.baselearner.control ecv.regression.integrator.control ecv.save plot.ecv.regression predict.ecv.regression

Try the EnsembleCV package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EnsembleCV
Extensible Package for Cross-Validation-Based Integration of Base Learners

R/ecv.R
In EnsembleCV: Extensible Package for Cross-Validation-Based Integration of Base Learners