Nothing
# best explanation!!!
# ~ 1 | c1/c2
# https://stats.stackexchange.com/questions/228800/crossed-vs-nested-random-effects-how-do-they-differ-and-how-are-they-specified
#' @title Cluster-Specific Sample Quantiles
#'
#' @description
#' Sample \link[stats]{quantile}s in each cluster of observations.
#'
#' @param formula \link[stats]{formula},
#' including response \eqn{y}, cluster(s) \eqn{c}'s,
#' cluster-specific covariate(s) \eqn{x}'s to be retained, and
#' cluster-specific covariate(s) \eqn{z}'s to be removed
#' from `data`, e.g.,
#' \describe{
#' \item{`y ~ 1 | c1`}{cluster \eqn{c_1}, without cluster-specific covariate}
#' \item{`y ~ 1 | c1/c2`}{cluster \eqn{c_1}, and cluster \eqn{c_2} nested in \eqn{c_1}, without cluster-specific covariate}
#' \item{`y ~ x1 + x2 | c1`}{cluster \eqn{c_1}, and cluster-specific covariates \eqn{x_1} and \eqn{x_2}}
# \item{`y ~ x1 + x2 | c1/c2`}{cluster \eqn{c_1}, cluster \eqn{c_2} nested in \eqn{c_1}, and cluster (\eqn{c_2} at least) specific covariates \eqn{x_1} and \eqn{x_2}}
#' \item{`y ~ . | c1`}{cluster \eqn{c_1}, and all (supposedly cluster-specific) covariates from `data`}
# \item{`y ~ . | c1/c2`}{cluster \eqn{c_1}, cluster \eqn{c_2} nested in \eqn{c_1}, and all (supposedly cluster-specific) covariates from `data`}
#' \item{`y ~ . - z1 - z2 | c1`}{cluster \eqn{c_1}, and all (supposedly cluster-specific) covariates, except for \eqn{z_1} and \eqn{z_2}, from `data`}
#' }
#'
#' @param data \link[base]{data.frame}
#'
# @param FUN \link[base]{character} scalar, name of
# \link[base]{function} for summary statistics,
# currently only supports \link[stats]{quantile} (`'quantile'` default)
# \link[np]{npquantile} (`'npquantile'`), or
# using \link[np]{npquantile} only if sample size is less or equal to 100 (`'npquantile100'`).
#'
#' @param f_sum_ \link[base]{function} to summarize the sample \link[stats]{quantile}s from
#' lower-level cluster \eqn{c_2} (if present),
#' such as \link[base]{mean.default} (default), \link[stats]{median.default}, \link[base]{max}, \link[base]{min}, etc.
#'
#' @param probs \link[base]{double} \link[base]{vector},
#' probabilities \eqn{\mathbf{p} = (p_1,\cdots,p_N)'}
#' shared across all clusters,
#' where the cluster-specific sample \link[stats]{quantile}s
# \eqn{\mathbf{q} = (q_1,\cdots,q_N)'}
#' of response \eqn{y} are calculated.
#' Default `seq(.01, .99, by = .01)`
#'
# @param type \link[base]{integer} scalar, see argument `type` of function \link[stats]{quantile}
#'
#' @param ... additional parameters of function \link[stats]{quantile}
#'
# @details
# Function [clusterQp] calculates \eqn{N} sample \link[stats]{quantile}s
# in each cluster of observations.
#'
#' @returns
#' Function [clusterQp] returns an \link[stats]{aggregate}d \link[base]{data.frame},
#' in which
#'
#' \itemize{
#'
#' \item {the highest cluster \eqn{c_1} and cluster-specific covariate(s) \eqn{x}'s
#' are retained.
#' \itemize{
#' \item {If the input `formula` takes form of `y ~ . | c1` or `y ~ . - z1 | c1`,
#' then all covariates (except for \eqn{z_1}) are considered cluster-specific;}
#' \item {Sample quantiles from lower-level clusters (e.g., \eqn{c_2}) are point-wise summarized using function `f_sum_`.}
#' }
#' }
#'
#' \item {response \eqn{y} is removed; instead, a \link[base]{double} \link[base]{matrix} of \eqn{N} columns stores
#' the cluster-specific sample \link[stats]{quantile}s.
# \eqn{\mathbf{q}} of the response \eqn{y}.
#' This \link[base]{matrix}
#' \itemize{
#' \item {is named after the \link[base]{parse}d \link[base]{expression} of response \eqn{y} in `formula`;}
#' \item {\link[base]{colnames} are the probabilities \eqn{\mathbf{p}}, for the ease of subsequent programming.}
#' }
#' }
#'
#' }
#'
#'
#' @examples
#' # see ?`Qindex-package` for examples
# @importFrom np npquantile
# @keywords internal
#' @importFrom stats aggregate quantile update.formula terms.formula
#' @export
clusterQp <- function(
formula, data,
# FUN = c('npquantile100', 'quantile', 'npquantile'),
f_sum_ = mean.default, # max, min, etc.
probs = seq.int(from = .01, to = .99, by = .01),
#type = 7,
...
) {
# if (anyNA(data)) # okay to have NA in `data`
if (!is.symbol(y <- formula[[2L]])) stop('Variable to be aggregated ', sQuote(deparse1(y)), ' must be `symbol`')
if (formula[[3L]][[1L]] != '|') stop('`formula` must have format of y ~ x1 | c1 or y ~ x1 | c1/c2')
# dealing with cluster(s)
cls <- formula[[3L]][[3L]]
# dealing with `x`
fomx_new <- Reduce(
f = function(e1, e2) call('-', e1, e2),
init = quote(.),
x = lapply(c(as.character(y), all.vars(cls)), FUN = as.symbol)
)
fomx <- update.formula(
old = terms.formula(call('~', formula[[3L]][[2L]]), data = data),
new = call('~', fomx_new))
#probs <- seq.int(from = from, to = to, by = by)
if (is.symbol(cls)) { # 1-level clustering
if (is.factor(data[[cls]])) data[[cls]] <- factor(data[[cls]]) # remove empty levels
aggr_fom <- eval(call(name = '~', y,
call('+', fomx[[2L]], cls)))
# ?stats:::aggregate.formula
ret <- aggregate(aggr_fom, data = data, FUN = quantile, probs = probs, ..., names = FALSE)
dimnames(ret[[y]]) <- list(NULL, probs)
} else {
if (cls[[1L]] != '/') stop('multi-level clustering must be specified as `| c1/c2`')
if (!is.symbol(cls1 <- cls[[2L]]) || !is.symbol(cls2 <- cls[[3L]])) stop('2-level clustering must be all-symbol, e.g. `| c1/c2`')
if (is.factor(data[[cls1]])) data[[cls1]] <- factor(data[[cls1]])
if (is.factor(data[[cls2]])) data[[cls2]] <- factor(data[[cls2]])
aggr_fom <- eval(call(name = '~', y, Reduce(
f = function(e1, e2) call('+', e1, e2),
x = as.list(cls)[-1L],
init = fomx[[2L]]
)))
ret0 <- aggregate(aggr_fom, data = data, FUN = quantile, probs = probs, ..., names = FALSE)
dimnames(ret0[[y]]) <- list(NULL, probs)
tmp1 <- split.data.frame(ret0, f = eval(call('~', cls1)))
# stopifnot(length(unique(ret0[[cls1]])) == length(tmp1))
tmp2 <- lapply(tmp1, FUN = function(tmp) {
# (tmp = tmp1[[1L]])
# lapply(all.vars(fomx), FUN = function(x) if (!all(duplicated(tmp[[x]])[-1L])) stop(sQuote(x), ' not unique per ', sQuote(cls1)))
out0 <- tmp
out0[[cls2]] <- out0[[y]] <- NULL
out <- unique.data.frame(out0)
if (nrow(out) != 1) stop('wont happen')
out[[y]] <- array(
data = apply(tmp[[y]], MARGIN = 2L, FUN = f_sum_, simplify = TRUE),
dim = c(1L, dim(tmp[[y]])[2L]),
dimnames = list(NULL, probs))
return(out)
})
ret <- do.call(what = rbind.data.frame, args = c(tmp2, list(make.row.names = FALSE)))
mult2 <- lapply(tmp1, FUN = function(tmp) {
if (nrow(tmp[[y]]) == 1L) return(invisible())
return(tmp[[y]])
})
attr(ret, which = 'mult2') <- mult2[lengths(mult2) > 0L]
}
#dimnames(ret[[y]]) <- list(NULL, probs)
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.