#' @include generics.R
#' @title Class, methods, and functions for analysis objects
#'
#' @description
#'
#' The`...` arguments for the `randomForest` S4 method are passed directly to `randomForest::randomForest`.
#'
#' @author Brandon McNellis
#'
#' @name analysis
#' @rdname analysis
NULL
#'
#' An S4 class for analysis results
#'
#' @slot type Type of analysis, set after populating object
#' @slot response Character vector of response variable names
#' @slot explanatory Character vector of explanatory variable names
#'
#' @rdname analysis
analysis <- setClass(
'analysis',
slots = list(
type = 'character',
sample = 'integer',
train_data = 'data.frame',
input_data = 'data.frame',
output_data = 'data.frame',
response = 'character',
explanatory = 'character',
models = 'list',
r2 = 'numeric'
)
)
#' @export
setValidity('analysis', function(object) {
errors <- character()
# train_data
r <- object@response
e <- object@explanatory
tdt <- object@train_data
if (all(length(r) > 0, length(e) > 0)) {
if (!all(r %in% colnames(tdt))) {
msg <- paste0('response variables missing from training data')
errors <- c(errors, msg)
}
if (!all(e %in% colnames(tdt))) {
msg <- paste0('explanatory variables missing from training data')
errors <- c(errors, msg)
}
}
# returns
if (length(errors) == 0) {
TRUE
} else {
errors
}
})
#' @rdname analysis
#' @export
setMethod('initialize',
signature(.Object = 'analysis'),
function (.Object, ...) {
params <- list(...)
# returns
mt <- validObject(.Object)
if (isTRUE(mt)) {
return(.Object)
} else {
return(mt)
}
}
)
#' @rdname analysis
#' @export
setMethod('randomForest',
signature(object = 'analysis'),
function (object, ...) {
if (length(object@type) > 0) {
stop('analysis already done?')
}
object <- PrepInput(object)
# wrapper for randomForest
df0 <- object@train_data
ys <- object@response
xs <- object@explanatory
sample <- object@sample
models <- vector('list', length(ys))
y_new <- object@response
in_df <- df0[, xs]
r2 <- vector('numeric', length(ys))
for (i in seq_along(ys)) {
ii <- ys[i]
if (length(unique(df0[, ii])) < 6) {
next
}
cat('\n', format(i / length(ys) * 100, digits = 0, nsmall = 2), ' variable:', ii)
irf <- randomForest::randomForest(x = in_df, y = df0[, ii], do.trace = F, ...)
#irf <- callNextMethod(data = in_df, y = df0[, ii], do.trace = T, ...)
r2i <- irf$rsq[length(irf$rsq)]
if (r2i < 0.1) {
y_new <- y_new[-which(y_new == ii)]
models[[i]] <- NULL
} else {
r2[[i]] <- r2i
models[[i]] <- irf
}
}
object@response <- y_new
object@models <- models
object@type <- 'randomForest'
return(object)
}
)
#' @rdname analysis
#' @export
setMethod("lm",
signature(data = "analysis"),
function (formula = NULL, data, subset, weights, na.action, method = "qr",
model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
contrasts = NULL, offset, ...) {
validObject(data)
if (length(formula) > 0) {
message('Ignoring input formula')
}
df0 <- data@train_data
df0e <- df0[, which(colnames(df0) %in% data@explanatory)]
data@r2 <- vector('numeric', length(data@response))
data@models <- vector('list', length(data@response))
for (i in seq_along(data@response)) {
ii <- data@response[i]
en <- paste(data@explanatory, collapse = ' + ')
fn <- paste(ii, '~', en, sep = ' ')
mod0 <- do.call(lm, args = list(formula = as.formula(fn), data = df0))
browser()
mod0 <- MASS::stepAIC(mod0, trace = 0)
cat('\nresponse:', ii)
cat('\nexplanatory after stepAIC:')
print(names(test$coefficients)[-1])
r2i <- summary(mod0)$adj.r.squared
if (r2i < 0.1) {
data@models[[i]] <- NULL
data@r2[[i]] <- summary(mod0)$adj.r.squared
} else {
data@models[[i]] <- mod0
data@r2[[i]] <- summary(mod0)$adj.r.squared
}
}
data@type <- 'lm'
return(data)
}
)
#' @rdname analysis
#' @export
setMethod('PrepInput',
signature(object = 'analysis'),
function (object, ...) {
params <- list(...)
# prep training data
stopifnot(validObject(object))
df0 <- object@train_data
nr <- nrow(df0)
r <- object@response
e <- object@explanatory
re_indx <- which(colnames(df0) %in% c(r, e))
df0[, re_indx] <- data.frame(lapply(df0[, re_indx], function(x) {
y <- if (is.character(x)) as.factor(x) else x
z <- ifelse(is.infinite(y), NA, y)
z
}))
df0_dp <- vector('logical', nrow(df0))
for (i in seq(nrow(df0))) {
df0_dp[i] <- all(!is.na(df0[i, ]))
}
df0 <- df0[df0_dp, ]
object@sample <- object@sample[df0_dp]
# return
if (ncol(df0) < 1) {
stop('no data left after refining')
} else {
cat('dropped ', nr - nrow(df0), 'rows.\n')
}
if (length(params) > 0) {
message('ignored passed parameters:')
cat('\n', sapply(params, function(x) paste0(x, '\n')), '\n')
}
object@train_data <- df0
validObject(object)
return(object)
}
)
#' @rdname analysis
#' @export
setMethod("print",
signature(x = "analysis"),
function (x, ...) {
#mls <- x@models
if (length(unlist(x@models)) > 0) {
for (i in seq_along(x@models)) {
ii <- x@models[[i]]
callNextMethod(x = ii, ...)
}
} else {
str(x, max.level = 2)
invisible()
}
}
)
#' @export
CheckParams <- function(x) {
stopifnot(is.list(x), length(x) > 0)
if ('incl_cutoff' %in% names(x)) {
stopifnot(!is.numeric(x$incl_cutoff), x$incl_cutoff < 0, x$incl_cutoff > 1)
}
if ('max_lvl' %in% names(x)) {
stopifnot(x$max_lvl < 0, !is.integer(x$max_lvl))
}
if ('complete_cases' %in% names(x)) {
stopifnot(is.logical(x), length(x) == 1)
}
invisible()
}
#' @rdname analysis
#' @export
DropModels <- function(object) {
validObject(object)
stopifnot(inherits(object, 'analysis'))
if (length(object@models) == 0) {
stop('no models, populate object first')
}
wm <- sapply(object@models, is.null)
if (sum(wm) == 0) {
stop('no null models')
}
object@models <- object@models[!wm]
object@r2 <- object@r2[!wm]
object@response <- object@response[!wm]
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.