#' a general purpose function you can use to get balanced data.
#'
#'
#' @param data the data.frame you wish to subset
#' @param ID a character vector of column names that identify each "person" in the data set
#' @param TIME a character vector specifying the different observation times for each ID
#' @param VARS argument to specify which fields must be NA
#' @param required states whether each ID must have an observation for every TIME (default) or if you set it to "shared", it will only return the TIMES that all IDs have non-missing values for.
#' @examples
#'
#' unbal <- data.frame(PERSON=c(rep('Frank',5),rep('Tony',5),rep('Edward',5)), YEAR=c(2001,2002,2003,2004,2005,2001,2002,2003,2004,2005,2001,2002,2003,2004,2005), Y=c(21,22,23,24,25,5,6,NA,7,8,31,32,33,34,35), X=c(1:15))
#' balanced(unbal, "PERSON","YEAR")
#' balanced(unbal, "PERSON","YEAR", "X")
#' balanced(unbal, "PERSON","YEAR", required="shared")
#'
#' @name balanced
#' @rdname balanced
#' @export
balanced<-function(data, ID, TIME, VARS, required=c("all","shared")) {
if(is.character(ID)) {
ID <- match(ID, names(data))
}
if(is.character(TIME)) {
TIME <- match(TIME, names(data))
}
if(missing(VARS)) {
VARS <- setdiff(1:ncol(data), c(ID,TIME))
} else if (is.character(VARS)) {
VARS <- match(VARS, names(data))
}
required <- match.arg(required)
idf <- do.call(interaction, c(data[, ID, drop=FALSE], drop=TRUE))
timef <- do.call(interaction, c(data[, TIME, drop=FALSE], drop=TRUE))
complete <- complete.cases(data[, VARS])
tbl <- table(idf[complete], timef[complete])
if (required=="all") {
keep <- which(rowSums(tbl==1)==ncol(tbl))
idx <- as.numeric(idf) %in% keep
} else if (required=="shared") {
keep <- which(colSums(tbl==1)==nrow(tbl))
idx <- as.numeric(timef) %in% keep
}
data[idx, ]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.