#' Empirical weighted quantile
#'
#' @param x A numeric vector
#' @param weights A vector of (positive) sample weights
#' @param probs a numeric vector with the desired quantile
#' levels (default 0.5, the median)
#' @return The weighted quantile (a numeric vector)
#' @references Ferrez, J., Graf, M. (2007). Enquète suisse sur
#' la structure des salaires. Programmes R pour l'intervalle
#' de confiance de la médiane.
#' (Rapport de méthodes No. 338-0045). Neuchâtel:
#' Office fédéral de statistique.
#'
#' @examples
#' wq(x = rnorm(100), weights = runif(100))
#' @export
wq <- function(x, weights, probs = c(0.5)) {
if (missing(weights)) {
weights <- rep(1L, length(x))
}
ord <- order(x)
cum.w <- cumsum(weights[ord])[!is.na(x)] / sum(weights[!is.na(x)])
tmpS <- data.frame(matrix(rep(NA, 2 * length(probs)), nrow = 2))
tmpO <- data.frame(matrix(rep(NA, 2 * length(probs)), nrow = 2))
res <- c(rep(NA, length(probs)))
for (i in 1:length(probs)) {
tmpS[i] <- cum.w[cum.w >= probs[i]][1:2]
tmpO[i] <- ord[cum.w >= probs[i]][1:2]
res[i] <- ifelse(
test = abs(tmpS[1, i] - probs[i]) < 1e-10,
yes = mean(x[tmpO[, i]]),
no = x[tmpO[1, i]]
)
}
return(res)
}
#' Data cube creation (dcc)
#'
#' @param .data data frame to be processed
#' @param .variables variables to split data frame by, as a
#' character vector (\code{c("var1", "var2")}).
#' @param .fun function to apply to each piece
#' (default: \code{\link{jointfun_}})
#' @param ... additional functions passed to \code{.fun}.
#' @return a data cube, with a column for each
#' cateogorical variable used, and a row for each
#' combination of all the categorical variables'
#' modalities. In addition to all the modalities,
#' each variable will also have a "Total" possibility,
#' which includes all the others. The data cube will
#' contain marginal, conditional and joint empirical
#' distributions...
#' @examples
#' data("invented_wages")
#' str(invented_wages)
#' tmp <- dcc(
#' .data = invented_wages,
#' .variables = c("gender", "sector"),
#' .fun = jointfun_
#' )
#' tmp
#' str(tmp)
#' @export
dcc <- function(.data, .variables, .fun = jointfun_, ...){
dcc5(
.data,
.variables,
.fun = .fun,
.total = "Totale",
order_type = extract_unique2,
.all = TRUE, ...
)
}
#' @param order_type a function like \code{\link{extract_unique}} or
#' \code{\link{extract_unique2}}.
#'
#' @examples
#' tmp2 <- dcc2(
#' .data = invented_wages,
#' .variables = c("gender", "education"),
#' .fun = jointfun_,
#' order_type = extract_unique2
#' )
#' tmp2
#' str(tmp2)
#'
#' @rdname dcc
#' @export
dcc2 <- function(.data,
.variables,
.fun = jointfun_,
order_type = extract_unique2,
...){
dcc5(
.data,
.variables,
.fun = .fun,
.total = "Totale",
order_type = order_type,
.all = TRUE,
...
)
}
#' Keeps only joint distribution (removes '.total').
#'
#' Removes all the rows where variables have value \code{.total}.
#'
#' @param .cube a datacube with 'Totale' modalities
#' @param .total modality to eliminate (filter out)
#' (default: "Totale")
#' @param .variables a character vector with the names
#' of the categorical variables
#' @return a subset of the data cube with only the
#' combinations of all variables modalities, without the "margins".
#'
#' @examples
#' data(invented_wages)
#' str(invented_wages)
#'
#' vars <- c("gender", "education")
#' tmp <- dcc2(
#' .data = invented_wages,
#' .variables = vars,
#' .fun = jointfun_,
#' order_type = extract_unique2
#' )
#' tmp
#' str(tmp)
#' only_joint(tmp, .variables = vars)
#'
#' # Compare dimensions (number of groups)
#' dim(tmp)
#' dim(only_joint(tmp, .variables = vars))
#' @export
only_joint <- function(.cube,
.total = "Totale",
.variables = NULL) {
if (is.null(.variables)) {
.variables <- attributes(.cube)[[".variables"]]
}
tmp <- paste0(.variables, " != '", .total, "'")
dots <- paste(tmp, collapse = " & ")
filter2_(.cube, .dots = dots)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.