###############################
### Cleaning functions
#' Recode very small levels into Missing values
#'
#' Kill.the.small recodes all levels with proportions below a certain threshold into "Missing".
#' This is useful for a very crude first look at a lot of variables.
#' @param variables is a data.frame of factors
#' @param threshold is the proportion below which a level is recoded
#' @param new.level is the new label for the recoded levels
#' @return a data.frame
#' @export kill.the.small
kill.the.small <- function(variables, threshold=0.05, new.level="Missing"){
for (i in 1:ncol(variables)){
prop.var <- prop.table(table(variables[,i]))
kill.levels <- names(prop.var)[prop.var < threshold]
kl <- levels(variables[,i]) %in% kill.levels
levels(variables[,i])[kl] <- new.level
}
variables
}
#' NA to Missing
#'
#' Recode all NA values into missing in a data.frame of factors
#'
#' @param x is a data.frame of factors
#' @param convert.to.factor if TRUE all non-factor columns in x are converted to factors
#' @param missing.value is the new value given to the NA values
#' @export
#' @examples
#'
#' x <- as.data.frame(matrix(rep(c("a", "b", "c", NA), 3), ncol=3))
#' x
#' na.to.missing(x)
na.to.missing <- function(x, convert.to.factor=FALSE, missing.value="MISSING"){
# Handling if values are not factors
if(identical(convert.to.factor, FALSE)){
if (all(unlist(lapply(x, is.factor)))==FALSE) stop("Not all columns in x are factors")
}
if(identical(convert.to.factor, TRUE)){
col.factors <- unlist(lapply(x, is.factor))
if (all(col.factors)==FALSE){
cat("Some columns where forced into factors")
x[, col.factors] <- apply(x[,col.factors], 2, as.factor)
}
}
# finding columns with NA values
missing.cols <- which(colSums(is.na(x)) > 0)
missing.x <- x[,missing.cols]
for ( i in 1:ncol(missing.x)){
levels(missing.x[,i]) <- c(levels(missing.x[,i]), missing.value)
missing.x[is.na(missing.x[,i]),i] <- missing.value
}
x[,missing.cols] <- missing.x
x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.