R/04GridClass.R

#' @include 03PreprocessorClass.R
NULL

## REPORT THE OUTCOME OF DATA VALIDATION

reportexitstatus <- function(preprocesseddatasets){

  if(class(preprocesseddatasets)!="list"){stop("Argument 'preprocesseddatasets' to function 'reportexitstatus' must of a list.")}

  variance <- unlist(lapply(preprocesseddatasets, function(x) slot(x, "variance")))
  finite <- unlist(lapply(preprocesseddatasets, function(x) slot(x, "finite")))
  completeobs <- unlist(lapply(preprocesseddatasets, function(x) slot(x, "completeobs")))
  classbalance <- unlist(lapply(preprocesseddatasets, function(x) slot(x, "classbalance")))
  ntopratiotwoplus <- unlist(lapply(preprocesseddatasets, function(x) slot(x, "ntopratiotwoplus")))
  mindimensions <- unlist(lapply(preprocesseddatasets, function(x) slot(x, "mindimensions")))

  check <- all(c(variance, finite, completeobs, classbalance, ntopratiotwoplus, mindimensions))==TRUE

  if (check==TRUE) {exitstatus <- c("Exit status: OK: Stable computation of misclassification errors expected.")}
  if (check==FALSE) {exitstatus <- c("Exit status: Warning: Unstable computation of misclassification errors expected. See: yourgridclassobject@data")}

  return(exitstatus)

}

## CREATE GRID OF PREPROCESSING COMBINATIONS FROM FROM PHASES

creategrid <- function(phases){
  if(class(phases)!="list"){stop("Argument 'phases' to function 'creategrid' must of a list.")}
  grid <- expand.grid(lapply(phases, function(x) eval(as.name(x))@preprotransformations))
  colnames(grid) <- unlist(phases)
  return(grid)
}

## PREPROCESS DATA FOR A SINGLE SUBCLASS OBJECT

initializedataslot <- function(classname, dataobject){

  tryCatch({

  subclassobject <- new(classname)

  if (class(dataobject)=="DataClass") {transformeddata <- transformdata(subclassobject, dataobject)}

  if (class(dataobject)=="data.frame") {transformeddata <- transformdata(subclassobject, initializedataclassobject(dataobject))}

  if (is(dataobject, "PreprocessorClass")==TRUE) {transformeddata <- transformdata(subclassobject, dataobject@data)}

  subclassobject@data <- transformeddata

  return(subclassobject)

  }, error= function(e) return({subclassobject <- new(classname)})
  )

}

## PREPROCESS DATA FOR THE WHOLE GRID

executepreprocessing <- function(grid, dataobject){

  if(class(grid)!="data.frame"){stop("Argument 'grid' must of a data frame of a GridClass object.")}
  if(class(dataobject)!="DataClass"){stop("Argument 'dataobject' must of a DataClass object.")}

  out_preprocesseddatasets <- vector(mode="list", nrow(grid))
  firstcolumningrid <- 1

  for (rowingrid in 1:nrow(grid))
  {

    # PREPROCESS INPUT DATA BY THE FIRST COLUMN OF THE GRID

    out_preprocesseddatasets[[rowingrid]] <- initializedataslot(as.character(grid[rowingrid, firstcolumningrid]), dataobject) # first column of grid

    if (ncol(grid) > 1){

      # PREPROCESS CONSEQUENT COLUMNS

      for (columningrid in 2:ncol(grid))
      {
        out_preprocesseddatasets[[rowingrid]] <- initializedataslot(as.character(grid[rowingrid,columningrid]), out_preprocesseddatasets[[rowingrid]]@data)
      }

    }

  }

  # VALIDATE AND REPORT PREPROCESSED DATA MODEL FITTING STATUS
  out_preprocesseddatasets <- lapply(out_preprocesseddatasets, function(x) slot(x, "data"))
  out_preprocesseddatasets <- lapply(out_preprocesseddatasets, validatedata)
  print(reportexitstatus(out_preprocesseddatasets))
  return(out_preprocesseddatasets)
}


## GRID

#' container for preprocessor combinations and preprocessed data sets.
#'
#' Preprocessing techniques defined with setpreprocessor() can be combined to a phase.
#' Phases defined with setphase() can be combined to a grid of combinations with setgrid().
#' The main programmatic use with preprocomb() takes a GridClass object as argument.
#'
#' GridClass is also an interface for extending the system to package 'metaheur', which takes
#' a GridClass object to find near-optiomal combinations fast.
#'
#' @slot grid (data frame) preprocessor combinations
#' @slot data (list) DataClass objects
#' @slot validation (data frame) validation results
#' @export

setClass("GridClass", representation(grid="data.frame", data="list", validation="data.frame"))

#' constructor function for creating the combinations
#'
#' setgrid takes the preprocessing phases, which contain preprocessors and creates
#' the combinations of them as a grid. It then computes and stores the transformed
#' data sets for each combination. setgrid initializes a GridClass object.
#
#' @param phases (character) vector of phases
#' @param data (data frame)
#' @param diagnostics (logical) run testpreprocessor(), defaults to TRUE
#' @return a GridClass object
#' @examples
#' grid <- setgrid(phases=c("outliers", "selection"), data=iris)
#' @details If there are missing values, imputation phase must be set as first phase.
#' Default phase "sampling" can only be used with data, which has binary class labels.
#' @export

setgrid <- function(phases, data, diagnostics=TRUE){

# Validate arguments
if(class(phases)!="character"){stop("Argument 'phases' must be a character vector.")}
if(class(data)!="data.frame"){stop("Argument 'data' must of a data frame.")}

issamplingincluded <- "sampling" %in% phases

phases <- as.list(phases)
if(!all(lapply(phases, function(x) class(eval(as.name(x))))=="PhaseClass")){
stop("All elements in argument 'phases' must point to PhaseClass objects.")}

# Initialize objects

dataclassobject <- initializedataclassobject(data)

hasmorethantwolevels <- length(levels(dataclassobject@y)) > 2

if (issamplingincluded==TRUE & hasmorethantwolevels==TRUE) {stop("Default phase 'sampling' can only be used with data, which has binary class labels.")}

gridclassobject <- new("GridClass")

# Create grid

gridclassobject@grid <- creategrid(phases)

# Test preprocessors

if (diagnostics==TRUE){
print("Running diagnostics on single preprocessors:")
validation <- testpreprocessors(unique(unlist(gridclassobject@grid)))
}
print("Preprocessing data set by combinations:")

# Preprocess data

gridclassobject@data <- executepreprocessing(gridclassobject@grid, dataclassobject)

# Collect validation results on combinations
validationresults <- data.frame(gridclassobject@grid, data.frame(t(data.frame(lapply(gridclassobject@data, extract)))))
row.names(validationresults) <- NULL
gridclassobject@validation <- validationresults

return(gridclassobject)
}

Try the preprocomb package in your browser

Any scripts or data that you put into this service are public.

preprocomb documentation built on May 2, 2019, 3:23 a.m.