#' @title Cleans up/preps dataframes for use in statistical model function
#' @description NA
#'
#' @param df Input dataframe
#' @param incl Which variables to include?
#' @param cc Only include complete cases?
#' @param char_to_fac Convert all characters to factors?
#' @param max_lvl Max # of factor levels? Useful for RF analysis
#' @param num_only Exclude all non-numeric variables?
#' @param vct Fractional threshold of non-NA values required
#' to include a variable.
#'
#' @details
#'
#' `vct` executes before `cc`, allowing you to drop mostly empty columns before
#' dropping plots with incomplete data.
#'
#' @export
#' @examples PrepDataForModels('FIA_Mort_DF')
deprecated_PrepDataForModels <- function(df, incl = colnames(df), cc = F, char_to_fac = T,
max_lvl = 53, num_only = F, vct = 0.9) {
# Setup:
if (!is.data.frame(df)) stop('Input must be a data frame')
out_df <- df
# Execute options:
out_df <- out_df[, which(colnames(out_df) %in% incl)]
if (ncol(out_df) < 2) stop('Error subsetting columns - named correctly?')
if (num_only) {
col_class <- unlist(lapply(out_df, class))
out_df <- out_df[, which(col_class %in% c('integer', 'numeric'))]
}
vct_incl <- unlist(lapply(out_df, function(x) {
y <- (sum(is.na(x)) / length(x)) < (1 - vct)
return(y)
}))
out_df <- out_df[, vct_incl]
if (cc) {
which_NA <- which(rowSums(is.na(out_df)) > 0)
if (length(which_NA) > 0) out_df <- out_df[-which_NA, ]
}
if (char_to_fac) {
out_df <- data.frame(lapply(out_df, function(x) {
if (is.character(x)) {
y <- as.factor(x)
} else {
y <- x
}
return(y)
}))
}
which_max_lvl <- unlist(lapply(out_df, function(x) {
if (is.factor(x)) {
y <- length(levels(x)) > max_lvl
return(y)
}
}))
if (sum(which_max_lvl) > 0) {
out_cols <- names(which(which_max_lvl == T))
out_df <- out_df[, -which(colnames(out_df) %in% out_cols)]
}
# Return:
if (ncol(out_df) < 1) stop('Dropped all columns')
cat('Rows dropped:', nrow(df) - nrow(out_df),
'\nCols dropped:', ncol(df) - ncol(out_df), '\n')
return(out_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.