R/get-balance-data.R

Defines functions balanced

Documented in balanced

#'   a general purpose function you can use to get balanced data.
#'
#'
#' @param data   the data.frame you wish to subset
#' @param ID     a character vector of column names that identify each "person" in the data set
#' @param TIME   a character vector specifying the different observation times for each ID
#' @param VARS   argument to specify which fields must be NA
#' @param required states whether each ID must have an observation for every TIME (default) or if you set it to "shared", it will only return the TIMES that all IDs have non-missing values for.
#' @examples
#'
#' unbal <- data.frame(PERSON=c(rep('Frank',5),rep('Tony',5),rep('Edward',5)), YEAR=c(2001,2002,2003,2004,2005,2001,2002,2003,2004,2005,2001,2002,2003,2004,2005), Y=c(21,22,23,24,25,5,6,NA,7,8,31,32,33,34,35), X=c(1:15))
#' balanced(unbal, "PERSON","YEAR")
#' balanced(unbal, "PERSON","YEAR", "X")
#' balanced(unbal, "PERSON","YEAR", required="shared")
#'
#' @name balanced
#' @rdname balanced
#' @export

balanced<-function(data, ID, TIME, VARS, required=c("all","shared")) {
    if(is.character(ID)) {
        ID <- match(ID, names(data))
    }
    if(is.character(TIME)) {
        TIME <- match(TIME, names(data))
    }
    if(missing(VARS)) {
        VARS <- setdiff(1:ncol(data), c(ID,TIME))
    } else if (is.character(VARS)) {
        VARS <- match(VARS, names(data))
    }
    required <- match.arg(required)
    idf <- do.call(interaction, c(data[, ID, drop=FALSE], drop=TRUE))
    timef <- do.call(interaction, c(data[, TIME, drop=FALSE], drop=TRUE))
    complete <- complete.cases(data[, VARS])
    tbl <- table(idf[complete], timef[complete])
    if (required=="all") {
        keep <- which(rowSums(tbl==1)==ncol(tbl))
        idx <- as.numeric(idf) %in% keep
    } else if (required=="shared") {
        keep <- which(colSums(tbl==1)==nrow(tbl))
        idx <- as.numeric(timef) %in% keep
    }
    data[idx, ]
}
ShouyeLiu/metaboliteUtility documentation built on May 6, 2019, 9:07 a.m.