#' metadata_sparsity
#'
#' Checks the number of missing values in each sample. This function can be used
#' to check how sparse the metadata is. In human studies, it is easy to have
#' sparse metadata which inadvertently gives subsets of samples simply based on
#' the amount of available information. This function tallies the number
#' of NA in each sample and returns subsets of samples based on the number of
#' missing values they have.
#'
#' @param met_df metadata dataframe with samples in rows and
#' metadata variables in columns
#'
#' @export
#'
#' @return list. na_tally is the first item in the list. It is a tally of
#' the number of samples with a given number of missing metadata variables.
#' subsequent items in the list are the subset of samples according to
#' the number of NA metadata variables they have.
#'
#' @examples
#' set.seed(1)
#' metadata_example <- data.frame(
#' sampleID = LETTERS[1:10],
#' group = c(rep(1:2, each = 3), rep(3, 4)),
#' age = c(rnorm(6, 30, 5), rep(NA, 4)),
#' sex = c(rep('F', 3), rep(NA, 4), rep('M', 3)),
#' ethnicity = sample(c(NA,1,2,3), 10, replace = TRUE),
#' medication = sample(c(NA,1,2), 10, replace=TRUE))
#'
#' met_sparse <- metadata_sparsity(metadata_example)
#' summary(met_sparse)
#' met_sparse$na_tally
#' met_sparse[[3]]
#' met_sparse[[4]]
metadata_sparsity <- function(met_df) {
n_na <- apply(met_df, 1, function(x) sum(is.na(x)))
na_tally <- as.data.frame(table(n_na))
out <- list(na_tally = na_tally)
for(i in 1:nrow(na_tally)) {
ind <- which(n_na == na_tally$n_na[i])
out[[i+1]] <- met_df[ind,]
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.