Nothing
#' Plot weighted cumulative distribution functions
#'
#' Plot cumulative distribution functions, possibly broken down according to
#' conditioning variables and taking into account sample weights.
#'
#' Weights are directly extracted from the input object \code{inp} and are
#' taken into account by adjusting the step height. To be precise, the
#' weighted step height for an observation is defined as its weight divided by
#' the sum of all weights\eqn{\ ( w_{i} / \sum_{j = 1}^{n} w_{j} ).}{.}
#'
#' @name spCdfplot
#' @aliases spCdfplot spCdfplot.default prepanelSpCdfplot panelSpCdfplot getCdf prepCdf prepCdf.data.frame prepCdf.default
#' @param inp an object of class \code{\linkS4class{simPopObj}} containing
#' survey sample and synthetic population data.
#' @param x a character vector specifying the columns of data available in the
#' sample and the population (specified in input object 'inp') to be plotted.
#' @param cond an optional character vector (of length 1, if used) specifying
#' the conditioning variable.
#' @param approx logicals indicating whether approximations of the cumulative
#' distribution functions should be computed. The default is to use
#' \code{FALSE} for the survey data and \code{TRUE} for the population data.
#' @param n integers specifying the number of points at which the
#' approximations take place (see \code{\link[stats:approxfun]{approx}}). It
#' is used wherever \code{approx} is \code{TRUE}.
#' @param bounds a logical indicating whether vertical lines should be drawn at
#' 0 and 1 (the bounds for cumulative distribution functions).
#' @param \dots further arguments to be passed to
#' \code{\link[lattice]{xyplot}}.
#' @return An object of class \code{"trellis"}, as returned by
#' \code{\link[lattice]{xyplot}}.
#' @author Andreas Alfons
#' @references
#' A. Alfons, M. Templ (2011) Simulation of close-to-reality population data for household surveys with application to EU-SILC. \emph{Statistical Methods & Applications}, \strong{20} (3), 383--407. doi: 10.1007/s10260-011-0163-2
#' @seealso \code{\link{spCdf}}, \code{\link[lattice]{xyplot}}
#' @keywords hplot
#' @export
#' @examples
#' ## these take some time and are not run automatically
#' ## copy & paste to the R command line
#'
#' set.seed(1234) # for reproducibility
#' data(eusilcS) # load sample data
#' \dontrun{
#' ## approx. 20 seconds computation time
#' inp <- specifyInput(data=eusilcS, hhid="db030", hhsize="hsize",
#' strata="db040", weight="db090")
#' simPop <- simStructure(data=inp, method="direct",
#' basicHHvars=c("age", "rb090", "hsize", "pl030", "pb220a"))
#'
#' # multinomial model with random draws
#' eusilcM <- simContinuous(simPop, additional="netIncome",
#' regModel = ~rb090+hsize+pl030+pb220a,
#' upper=200000, equidist=FALSE, nr_cpus=1)
#' class(eusilcM)
#'
#' # plot results
#' spCdfplot(eusilcM, "netIncome", cond=NULL)
#' spCdfplot(eusilcM, "netIncome", cond="rb090", layout=c(1,2))
#' }
spCdfplot <- function(inp, x, cond = NULL, approx = c(FALSE, TRUE),
n = 10000, bounds = TRUE, ...) {
## initializations
if ( !inherits(inp, "simPopObj" ) ){
stop("input argument 'inp' must be of class 'simPopObj'!\n")
}
weights.pop <- inp@pop@weight
weights.samp <- inp@sample@weight
dataS <- inp@sample@data
dataP <- inp@pop@data
if ( !is.character(x) || length(x) == 0 ) {
stop("'x' must be a character vector of positive length!\n")
}
if ( !(all(x %in% colnames(dataP)) & (all(x %in% colnames(dataS)))) ) {
stop("The variable names specified in argument 'x' must be available both in the population and the sample!\n")
}
if ( !is.null(cond) && !is.character(cond) ) {
stop("'cond' must be a character vector or NULL!\n")
if ( length(cond) != 1 ) {
stop("argument 'cond' must have length 1!\n")
}
}
if ( !(all(cond %in% colnames(dataP)) & (all(cond %in% colnames(dataS)))) ) {
stop("The variable names specified in argument 'cond' must be available both in the population and the sample!")
}
# check 'approx'
if(!is.logical(approx) || length(approx) == 0) approx <- formals()$approx
else approx <- sapply(rep(approx, length.out=2), isTRUE)
# check 'n'
if(any(approx) && (!is.numeric(n) || length(n) == 0)) {
stop("'n' is not numeric or does not have positive length")
} else n <- ifelse(approx, n[1], NA)
# check 'bounds'
bounds <- isTRUE(bounds)
# define labels for grouping variable
lab <- c("Sample", "Population")
## construct objects for 'xyplot'
# from sample
tmp <- getCdf(x, weights.samp, cond, dataS, approx=approx[1], n=n[1], name=lab[1])
values <- tmp$values
app <- t(tmp$approx)
# from population
tmp <- getCdf(x, weights.pop, cond, dataP, approx=approx[2], n=n[2], name=lab[2])
values <- rbind(values, tmp$values)
app <- rbind(app, tmp$approx)
## construct formula for 'xyplot'
form <- ".y~.x" # basic formula
if(length(x) > 1) cond <- c(".var", cond)
if(!is.null(cond)) {
cond <- paste(cond, collapse = " + ") # conditioning variabels
form <- paste(form, cond, sep=" | ") # add conditioning to formula
}
## define local version of 'xyplot'
localXyplot <- function(form, values, xlab = NULL, ylab = NULL,
auto.key = TRUE, ...,
# these arguments are defined so that they aren't supplied twice:
x, data, allow.multiple, outer, panel, prepanel, groups) {
# prepare legend
if(isTRUE(auto.key)) auto.key <- list(points=FALSE, lines=TRUE)
else if(is.list(auto.key)) {
if(is.null(auto.key$points)) auto.key$points <- FALSE
if(is.null(auto.key$lines)) auto.key$lines <- TRUE
}
command <- paste("xyplot(form, data=values, groups=.name,",
"panel=panelSpCdfplot, prepanel=prepanelSpCdfplot,",
"xlab=xlab, ylab=ylab, auto.key=auto.key, ...)")
eval(parse(text=command))
}
## call 'xyplot'
localXyplot(as.formula(form), values, approx=app, bounds=bounds, ...)
}
NULL
## panel function
#' @rdname spCdfplot
#' @export
panelSpCdfplot <- function(x, y, approx, bounds = TRUE, ...) {
if(isTRUE(bounds)) {
panel.refline(h=0, ...)
panel.refline(h=1, ...)
}
localPanelXyplot <- function(..., approx, type, distribute.type) {
i <- packet.number()
type <- ifelse(approx[,i], "l", "s")
panel.xyplot(..., type=type, distribute.type=TRUE)
}
localPanelXyplot(x, y, approx=approx, ...)
}
NULL
## prepanel function
#' @rdname spCdfplot
#' @export
prepanelSpCdfplot <- function(x, y, ...) list(ylim=c(0,1))
NULL
## internal utility functions
# get data.frame and logical indicating approximation
#' @rdname spCdfplot
#' @export
#' @keywords internal
getCdf <- function(x, weights = NULL, cond = NULL, data, ..., name = "") {
if ( is.null(cond) ) {
x <- data[, x, with=FALSE]
if ( length(weights) == 0 ) {
w <- NULL
} else {
w <- data[[weights]]
}
prepCdf(x, w, ..., name=name)
} else {
spl <- split(data, data[[cond]])
tmp <- lapply(spl, function(z) {
data <- z
x <- data[, x, with=FALSE]
if ( length(weights) == 0 ) {
w <- NULL
} else {
w <- data[[weights]]
}
res <- prepCdf(x, w, ..., name=name)
res$values <- cbind(res$values, rep(data[[cond]][1], nrow(res$values)))
colnames(res$values)[ncol(res$values)] <- cond
res
})
values <- do.call(rbind, lapply(tmp, function(x) x$values))
approx <- as.vector(sapply(tmp, function(x) x$approx))
list(values=values, approx=approx)
}
}
NULL
# prepare one or more variables
#' @rdname spCdfplot
#' @export
#' @keywords internal
prepCdf <- function(x, w, ..., name = "") UseMethod("prepCdf")
NULL
#' @rdname spCdfplot
#' @export
#' @keywords internal
prepCdf.data.frame <- function(x, w, ..., name = "") {
tmp <- lapply(x, prepCdf, w, ..., name=name)
values <- mapply(function(x, v) cbind(x$values, .var=v), tmp, names(x), SIMPLIFY=FALSE, USE.NAMES=FALSE)
values <- do.call(rbind, values)
approx <- sapply(tmp, function(x) x$approx)
list(values=values, approx=approx)
}
NULL
#' @rdname spCdfplot
#' @export
#' @keywords internal
prepCdf.default <- function(x, w, ..., name = "") {
tmp <- spCdf(x, w, ...)
values <- data.frame(.x=c(tmp$x[1], tmp$x), .y=c(0, tmp$y), .name=name)
list(values=values, approx=tmp$approx)
}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.