R/analysis.R

#' @include generics.R
#' @title Class, methods, and functions for analysis objects
#'
#' @description
#'
#' The`...` arguments for the `randomForest` S4 method are passed directly to `randomForest::randomForest`.
#'
#' @author Brandon McNellis
#'
#' @name analysis
#' @rdname analysis
NULL
#'
#' An S4 class for analysis results
#'
#' @slot type Type of analysis, set after populating object
#' @slot response Character vector of response variable names
#' @slot explanatory Character vector of explanatory variable names
#'
#' @rdname analysis
analysis <- setClass(
  'analysis',
  slots = list(
    type = 'character',
    sample = 'integer',
    train_data = 'data.frame',
    input_data = 'data.frame',
    output_data = 'data.frame',
    response = 'character',
    explanatory = 'character',
    models = 'list',
    r2 = 'numeric'
  )
)
#' @export
setValidity('analysis', function(object) {
  errors <- character()

  # train_data
  r <- object@response
  e <- object@explanatory
  tdt <- object@train_data
  if (all(length(r) > 0, length(e) > 0)) {
    if (!all(r %in% colnames(tdt))) {
      msg <- paste0('response variables missing from training data')
      errors <- c(errors, msg)
    }
    if (!all(e %in% colnames(tdt))) {
      msg <- paste0('explanatory variables missing from training data')
      errors <- c(errors, msg)
    }
  }

  # returns
  if (length(errors) == 0) {
    TRUE
  } else {
    errors
  }
})
#' @rdname analysis
#' @export
setMethod('initialize',
          signature(.Object = 'analysis'),
          function (.Object, ...) {
            params <- list(...)

            # returns
            mt <- validObject(.Object)
            if (isTRUE(mt)) {
              return(.Object)
            } else {
              return(mt)
            }
          }
)
#' @rdname analysis
#' @export
setMethod('randomForest',
          signature(object = 'analysis'),
          function (object, ...) {

            if (length(object@type) > 0) {
              stop('analysis already done?')
            }
            object <- PrepInput(object)

            # wrapper for randomForest
            df0 <- object@train_data
            ys <- object@response
            xs <- object@explanatory
            sample <- object@sample
            models <- vector('list', length(ys))
            y_new <- object@response

            in_df <- df0[, xs]
            r2 <- vector('numeric', length(ys))

            for (i in seq_along(ys)) {
              ii <- ys[i]
              if (length(unique(df0[, ii])) < 6) {
                next
              }
              cat('\n', format(i / length(ys) * 100, digits = 0, nsmall = 2), ' variable:', ii)
              irf <- randomForest::randomForest(x = in_df, y = df0[, ii], do.trace = F, ...)
              #irf <- callNextMethod(data = in_df, y = df0[, ii], do.trace = T, ...)
              r2i <- irf$rsq[length(irf$rsq)]
              if (r2i < 0.1) {
                y_new <- y_new[-which(y_new == ii)]
                models[[i]] <- NULL
              } else {
                r2[[i]] <- r2i
                models[[i]] <- irf
              }
            }

            object@response <- y_new
            object@models <- models
            object@type <- 'randomForest'
            return(object)
          }
)
#' @rdname analysis
#' @export
setMethod("lm",
          signature(data = "analysis"),
          function (formula = NULL, data, subset, weights, na.action, method = "qr",
                    model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
                    contrasts = NULL, offset, ...) {

            validObject(data)
            if (length(formula) > 0) {
              message('Ignoring input formula')
            }

            df0 <- data@train_data
            df0e <- df0[, which(colnames(df0) %in% data@explanatory)]
            data@r2 <- vector('numeric', length(data@response))
            data@models <- vector('list', length(data@response))

            for (i in seq_along(data@response)) {
              ii <- data@response[i]
              en <- paste(data@explanatory, collapse = ' + ')
              fn <- paste(ii, '~', en, sep = ' ')
              mod0 <- do.call(lm, args = list(formula = as.formula(fn), data = df0))
              browser()
              mod0 <- MASS::stepAIC(mod0, trace = 0)
              cat('\nresponse:', ii)
              cat('\nexplanatory after stepAIC:')
              print(names(test$coefficients)[-1])
              r2i <- summary(mod0)$adj.r.squared
              if (r2i < 0.1) {
                data@models[[i]] <- NULL
                data@r2[[i]] <- summary(mod0)$adj.r.squared
              } else {
                data@models[[i]] <- mod0
                data@r2[[i]] <- summary(mod0)$adj.r.squared
              }

            }

            data@type <- 'lm'
            return(data)
          }
)
#' @rdname analysis
#' @export
setMethod('PrepInput',
          signature(object = 'analysis'),
          function (object, ...) {
            params <- list(...)

            # prep training data
            stopifnot(validObject(object))
            df0 <- object@train_data
            nr <- nrow(df0)
            r <- object@response
            e <- object@explanatory
            re_indx <- which(colnames(df0) %in% c(r, e))

            df0[, re_indx] <- data.frame(lapply(df0[, re_indx], function(x) {
              y <- if (is.character(x)) as.factor(x) else x
              z <- ifelse(is.infinite(y), NA, y)
              z
            }))

            df0_dp <- vector('logical', nrow(df0))
            for (i in seq(nrow(df0))) {
              df0_dp[i] <- all(!is.na(df0[i, ]))
            }
            df0 <- df0[df0_dp, ]
            object@sample <- object@sample[df0_dp]

            # return
            if (ncol(df0) < 1) {
              stop('no data left after refining')
            } else {
              cat('dropped ', nr - nrow(df0), 'rows.\n')
            }

            if (length(params) > 0) {
              message('ignored passed parameters:')
              cat('\n', sapply(params, function(x) paste0(x, '\n')), '\n')
            }

            object@train_data <- df0
            validObject(object)
            return(object)
          }
)
#' @rdname analysis
#' @export
setMethod("print",
          signature(x = "analysis"),
          function (x, ...) {
            #mls <- x@models
            if (length(unlist(x@models)) > 0) {
              for (i in seq_along(x@models)) {
                ii <- x@models[[i]]
                callNextMethod(x = ii, ...)
              }
            } else {
              str(x, max.level = 2)
              invisible()
            }
          }
)
#' @export
CheckParams <- function(x) {
  stopifnot(is.list(x), length(x) > 0)
  if ('incl_cutoff' %in% names(x)) {
    stopifnot(!is.numeric(x$incl_cutoff), x$incl_cutoff < 0, x$incl_cutoff > 1)
  }
  if ('max_lvl' %in% names(x)) {
    stopifnot(x$max_lvl < 0, !is.integer(x$max_lvl))
  }
  if ('complete_cases' %in% names(x)) {
    stopifnot(is.logical(x), length(x) == 1)
  }
  invisible()
}
#' @rdname analysis
#' @export
DropModels <- function(object) {
  validObject(object)
  stopifnot(inherits(object, 'analysis'))

  if (length(object@models) == 0) {
    stop('no models, populate object first')
  }

  wm <- sapply(object@models, is.null)
  if (sum(wm) == 0) {
    stop('no null models')
  }

  object@models <- object@models[!wm]
  object@r2 <- object@r2[!wm]
  object@response <- object@response[!wm]

  return(object)
}
bmcnellis/RSFIA documentation built on June 1, 2019, 7:40 a.m.