#' @title Basic info on each col of data.frame - testing faster way, but returns text
#'
#' @description
#' Returns basic information on each field in a data.frame, like count of rows that are zero, negative,
#' NA, infinite, etc.
#'
#' Slow - work in progress
#' Leaves out logical, complex?, character, etc. cols
#' this version fails to handle fields that are factor class!?
#' @param df Matrix or data.frame to examine. Cannot be a single vector currently.
#' @return Returns a vector of results, one per col of df
#' @template nachecks
#' @export
na.check2 = function(df) {
cols=names(df)
#df <- as.matrix(df)
myfun <- function(x) {
c(
length(x),
sum(!is.na(x)),
sum(is.na(x)),
round(100 * sum(!is.na(x)) / length(x), 1),
sum(x==0, na.rm=TRUE),
sum(x < 0, na.rm=TRUE),
sum(is.infinite(x)),
sum(x=='', na.rm=TRUE),
sum(x!='', na.rm=TRUE),
round(100 * sum(x!='', na.rm=TRUE) / length(x), 1),
sum(!is.na(unique(x))),
ifelse(all(x==0) | all(is.na(x)), NA, min(x[!is.na(x) & x!=0]))
)
}
# FASTER THAN na.check(), but returns character fields
results <- matrix( sapply(df[ , cols], FUN=myfun), nrow=length(cols), byrow=TRUE)
rownames(results) <- cols
colnames(results) <- c(
'bcount',
'not.na',
'na',
'pct.not.na',
'zero',
'neg',
'inf',
'blank',
'not.blank.not.na',
'pct.nbna',
'unique.not.na',
'min.nonzero')
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.