##
## up + constant: to check whether something is constant
##
#' Generic test for constant
#'
#' @param x
#' @param na.rm (default FALSE)
#' @param \dots
#' @export
constant <- function(x,...) UseMethod("constant")
#' @describeIn constant
#' @export
constant.default <- function(x, na.rm = FALSE,...) {
if (na.rm) x <- na.omit(x)
length(unique(x)) <= 1
}
#' @param id
#' @param all
#' @param \dots
#' @describeIn constant
#' @export
constant.data.frame <- function( x, id , all = FALSE , ...) {
## Description: (G. Monette, June 13, 2005)
## check if values of variables are constant within levels of id
## if no id, check each variable
## if all == TRUE, report overall instead of within levels of id
## Possible improvements:
## allow nested formulas: ~id1/id2 for id and report level of
## each variable
## [see varLevel()]
## note that the following code allows id to be given as a name or as
## a formula
if (missing(id)) ret <- sapply(x, constant,...)
else {
id <- eval(substitute(id), x, parent.frame())
if( inherits(id,"formula") ) id <- c( model.frame(id,x) )
ret <- sapply(x, function(xx) tapply(xx, id, constant, ...))
if ( all ) ret <- apply( ret, 2, all)
}
ret
}
###
### varLevel
###
#' Identify level of aggregation at which a variable in invariant
#'
#' Shows levels of each variable with respect to grouping formula
#' of form ~id or nested ids ~id1/id2
#'
#' @return Level 0 is a constant for the whole data frame,
#' Level <= 1 implies the variable is constant within levels of id1,
#' Level <= 2 implies the variable is constant within levels of id2,
#' ... etc.
#'
#' NOTE: NA counts as a distinct value
#' @param x
#' @param form formula to identify clusters
#' @param \dots extra arguments to \code{\link{constant}} function
#' @export
varLevel <- function(x, form, ...) {
## Description: (G. Monette, June 13, 2005)
sel <- model.frame( form, x )
z <- list()
idx <- rep('',nrow(x))
z[[1]] <- constant(x,...)
for ( ii in 1:ncol(sel)) {
idx <- paste(idx, as.character(sel[[ii]]), sep = ";")
z[[ii+1]] <- constant( x, idx, all = TRUE,...)
}
# print(z)
ret <- do.call("rbind", z)
# print(ret)
ret <- length(z) - apply( ret*1, 2 , sum)
ret
}
###
### up
###
#' Create a data frame at a higher level of aggregation
#'
#' Produce a higher level data set with one row per cluster. The data set can
#' contain only variables that are invariant in each cluster or it can also
#' include summaries (mean or modes) of variables that vary by cluster. Adapted
#' from \code{gsummary} in the \code{nlme} package.
#'
#' \code{up} was created from \code{nlme::gsummary} and modified to make it
#' easier to use and to make an equivalent of \code{gsummary} available when
#' using \code{lme4}.
#'
#' @param object a data frame to be summarized.
#' @param form a one-sided formula identifying the variable(s) in \code{object}
#' that identifies clusters. e.g. ~ school/Sex to get a summary within each Sex
#' of each school.
#' @param all if TRUE, include summaries of variables that vary within
#' clusters, otherwise keep only cluster-invariant variables.
#' @param sep separator to form cluster names combining more than one
#' clustering variables. If the separator leads to the same name for distinct
#' clusters (e.g. if var1 has levels 'a' and 'a/b' and var2 has levels 'b/c'
#' and 'c') the function produces an error and a different separator should be
#' used.
#' @param FUN function to be used for summaries.
#' @param omitGroupingFactor kept for compatibility with \code{gsummary}
#' @param groups kept for compatibility with \code{gsummary}
#' @param invariantsOnly kept for compatibility with \code{gsummary}
#' @param \dots additional arguments to \code{tapply} when summarizing
#' numerical variables. e.g. \code{na.rm = TRUE}
#' @return a data frame with one row per value of the variable in \code{form}
#' @examples
#' data(hs)
#' dim( hs )
#' hsu <- up( hs, ~ school )
#' dim( hsu )
#'
#' # to also get cluster means of cluster-varying numeric variables and modes of factors:
#'
#' hsa <- up( hs, ~ school , all = TRUE )
#'
#' # to get summary proportions of cluster varying factors:
#'
#' up( cbind( hs, model.matrix( ~ Sex -1 , hs)), ~ school, all = T)
#'
#'
#' ## To plot a summary between-cluster panel along with within-cluster panels:
#'
#' hsu <- up( hs, ~ school, all = TRUE)
#' hsu$school <- ' between' # space to make it come lexicographically before cluster names
#'
#' require( lattice )
#' xyplot( mathach ~ ses | school, rbind(hs,hsu),
#' panel = function( x, y, ...) {
#' panel.xyplot( x, y, ...)
#' panel.lmline( x, y, ...)
#' } )
#'
#' @author largely from gsummary in Bates & Pinheiro
#' @export
up <- function ( object, form = formula(object),
all = FALSE, sep = "/",
FUN = function(x) mean(x, na.rm = TRUE),
omitGroupingFactor = FALSE,
groups, invariantsOnly = !all , ...)
{
if (!inherits(object, "data.frame")) {
stop("Object must inherit from data.frame")
}
sel.mf <- model.frame( form , object , na.action = na.include )
narows <- apply(sel.mf,1,function(x) any(is.na(x)))
if(any(narows)) {
warning("Rows with NAs in grouping variable(s) are omitted")
sel.mf <- droplevels(sel.mf[!narows,,drop=FALSE])
object <- object[!narows,,drop=FALSE]
}
if ( ncol(sel.mf) > 1) {
sel <- apply( sel.mf, 1 , paste, collapse = sep)
groups <- as.factor(sel)
# Check if sep works to create unique group combinations
sel2 <- apply(sel.mf,1, paste, collapse = as.character(sample(1000:9999,1)))
if ( length( unique(sel)) != length( unique(sel2))) {
stop( 'distinct grouping combinations have the same name: change the "sep" argument')
}
} else {
groups <- as.factor(sel.mf[[1]])
}
gunique <- unique(groups)
firstInGroup <- match(gunique, groups)
asFirst <- firstInGroup[match(groups, gunique)]
value <- as.data.frame(object[firstInGroup, , drop = FALSE])
row.names(value) <- as.character(gunique)
value <- value[as.character(sort(gunique)), , drop = FALSE]
varying <- unlist(lapply(object, function(column, frst) {
aux <- column
if( is.matrix( aux))aux[] <- as.character( aux )
else aux <- as.character(aux)
if ( is.matrix( aux )) any(!identical( aux, aux[frst,]))
else any(!identical(aux, aux[frst]))
}, frst = asFirst))
if (any(varying) && (!invariantsOnly)) {
Mode <- function(x) {
aux <- table(x)
names(aux)[match(max(aux), aux)]
}
if (data.class(FUN) == "function") {
FUN <- list(numeric = FUN, ordered = Mode, factor = Mode, logical = FUN)
}
else {
if (!(is.list(FUN) && all(sapply(FUN, data.class) ==
"function"))) {
stop("FUN can only be a function or a list of functions")
}
auxFUN <- list(numeric = mean, ordered = Mode, factor = Mode, logical = mean)
aux <- names(auxFUN)[is.na(match(names(auxFUN), names(FUN)))]
if (length(aux) > 0)
FUN[aux] <- auxFUN[aux]
}
for (nm in names(object)[varying]) {
dClass <- if (is.ordered(object[[nm]]))
"ordered"
else if (is.factor(object[[nm]]))
"factor"
else mode(object[[nm]])
if (dClass == "numeric") {
if( is.matrix ( object[[nm]])){
zmat <- object[[nm]]
ret <- list()
for ( jj in seq_len(ncol(zmat))) {
ret[[jj]] <- as.vector( tapply( zmat[,jj],
groups, FUN[['numeric']],...))
}
value[[nm]] <- do.call(cbind, ret)
} else {
value[[nm]] <- as.vector(tapply(object[[nm]],
groups, FUN[["numeric"]], ...))
}
}
else {
value[[nm]] <- as.vector(tapply(object[[nm]],
groups, FUN[[dClass]]))
}
}
}
else {
value <- value[, !varying, drop = FALSE]
}
if (omitGroupingFactor) {
if (is.null(form)) {
stop("Cannot omit grouping factor without \"form\"")
}
grpForm <- getGroupsFormula(form, asList = TRUE)
if (missing(level))
level <- length(grpForm)
grpNames <- names(grpForm)[level]
whichKeep <- is.na(match(names(value), grpNames))
if (any(whichKeep)) {
value <- value[, whichKeep, drop = FALSE]
}
else {
return(NULL)
}
}
value
}
#' na.include action
#'
#' From the Hmisc package, author: Frank Harrell
#'
#' @param obj a data.frame whose factors will be redefined so exclude = NULL
#' @export
na.include <-
function (obj)
{
if (inherits(obj, "data.frame"))
for (i in seq(along = obj)) obj[[i]] <- na.include(obj[[i]])
else {
if (length(levels(obj)) && any(is.na(obj)))
obj <- factor(obj, exclude = NULL)
}
obj
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.