stat_bloc <- function(
mapping = NULL,
data = NULL,
geom = "bloc",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE,
side = side,
...){
# custom params
# offset = 0.01) {
#browser()
ggplot2::layer(
data = data,
mapping = mapping,
stat = StatBloc,
geom = geom,
position = position,
show.legend = show.legend,
check.aes = FALSE,
inherit.aes = FALSE,
side,# only FALSE to turn the warning off
params = list(
na.rm = na.rm,
# divider = divider,
prob.struct,
offset = offset,
extra_var,
...
)
)
}
#' @importFrom utils tail
#' @import dplyr
StatBloc <- ggplot2::ggproto(
"StatBloc", ggplot2::Stat,
non_missing_aes = "weight",
# this wouldn't work would it
# default_aes = aes(y = stat(density)),
compute_panel = function(self, data, scales, na.rm = FALSE,
prob.struct, offset = 0.01, extra_var,
bw = "nrd0",
adjust = 1,
kernel = "gaussian",
n = 512,side){
margin <- getFromNamespace("margin", "productplots") # TODO: get rid of productplot dependency
# stuff from prodcalc()
# this is wrong... need unique()
#browser()
marg_var <- sapply(get_margs(prob.struct), as.character)
if (!(is.list(marg_var) & length(marg_var) == 0)) {
marg_var <- paste0("p.", marg_var)
} else {
marg_var <- c()
}
cond_var <- sapply(get_conds(prob.struct), as.character)
#browser()
#if(extra_var != "NULL") cond_var = c(cond_var, fill_var)
if (!(is.list(cond_var) & length(cond_var) == 0)){
cond_var <- paste0("p.", cond_var)
} else {
cond_var <- c()
}
# check if these variables are continuous
all_rvs <- unique(c(marg_var, cond_var))
is_continuous <- function(i) {
!rlang::is_integerish(i)
}
rv_levels <- sapply(as.tibble(data[, all_rvs]), is_continuous)
has_too_many_levels <- sum(rv_levels)
if (!has_too_many_levels){
message("Defaulting to mosaic plots. Use `stat=blocdensity` to force density plots")
#here data$group is different
#browser()
# the normal mosaic plot things
wt <- margin(data, marg_var, cond_var)
# base_layer <- function(data, prob.struct, offset, level=1, bounds = productplots:::bound()){
#browser()
res <- bloc_divide(data = wt, prob.struct = prob.struct, offset = offset)
for(i in names(extra_var)){
if(!is.na(extra_var[i])){
res[,i] = res[,paste0("p.", extra_var[i])]
}
}
#
# pieces <- as.list(dlply(res, 1))
# res <- ldply(seq_along(pieces), function(i){
# piece <- pieces[[i]]
#
# l <- piece$l
# r <- piece$r
# b <- piece$b
# t <- piece$t
#
# # coords <- data.frame(x = c(l,l,r,r), y=c(b,t,b,t))
# coords <- new_data_frame(list(
# y = c(t, t, b, b, t),
# x = c(l, r, r, l, l)
# ))
#
# others <- piece %>%
# subset(select = -c(l, r, b, t))
#
# others$group <- i
# cbind(coords, others)
# })
res <- dplyr::rename(res, xmin=l, xmax=r, ymin=b, ymax=t)
#browser()
res$group <- seq_len(nrow(res))
res
} else {
# ==================== density plots ======================
message("Defaulting to density plots. Use `stat=mosaic` to force mosaic plots")
is_P_B_given_A <- FALSE
is_joint <- FALSE
is_ridges <- FALSE # for P(A|B) defaults to P(A|B), color <- B
# Need to ignore the x.conds rows
aeses <- unlist(prob.struct$aes, use.names = FALSE)
# the real number of terms in pmf, like P(B|A) P(A) has two terms
# but pmf = P(B | A) only has one
# n_prob_terms <- sum(!grepl("cond", aeses))
n_prob_terms <- length(marg_var)
if (n_prob_terms > 2){
stop("one simply does not draw density plots with such complexity")
}
if (n_prob_terms == 1){
# there should only be 1 var in the marginals
stopifnot(length(marg_var) == 1)
if (is_continuous(data[, marg_var])){
# is it P(A|...?)
# all the conds should be discrete
if (!is.null(cond_var)){
none_cond_continuous <-
sum(sapply(as.tibble(data[, cond_var]), is_continuous)) == 0
stopifnot(none_cond_continuous)
}
# the last aes (for P(A|...)) must be one of these
stopifnot(grepl("x.height", tail(aeses, n = 1)) |
grepl("y.width", tail(aeses,n=1)) )
# base_layout does not consider color, etc aeses
# sub_prob.struct
# for (i in seq_len(nrow(prob.struct))){
# cur_aes <- prob.struct[i,3][[1]]
# if (grepl("colour", cur_aes)){
# prob.struct <- prob.struct[-i, ]
# break
# }
# }
# ===== P(A|...) =======
cont_var <- marg_var
group_var <- cond_var
# partition
# if (is.null(cond_var)){
if (nrow(prob.struct) == 1){
# P(A)
# wt should not be calculated here
base_layout <- data.frame(.wt = 1, l = 0, r = 1, b = 0, t = 1, level = 1)
} else {
# P(A|...)
# is it P(A|B), color <- B or y <- B
if (grepl("x",tail(aeses, n= 1))){ # A on x.height
if (grepl("y", tail(aeses, n=2)[1])){
# P(A|B), y <- B
is_ridges <- TRUE
} else if (grepl("x", tail(aeses, n=2)[1])){
stop(paste("Check aes mapping on the categorial variable ", group_var))
}
} else { # A on y.width
if (grepl("x", tail(aeses, n=2)[1])){
# P(A|B), x <- B
is_ridges <- TRUE
} else if (grepl("y", tail(aeses, n=2)[1])){
stop(paste("Check aes mapping on the categorial variable ", group_var))
}
}
# process
wt <- margin(data, marg_var, cond_var)
base_layout <- icon_divide(data = wt, prob.struct = prob.struct, offset = offset)
}
} else {
# is it P(B|A, ...)?
# A should be continuous, but
# the rest of the conds should be discrete
if (!is.null(cond_var)){
one_cond_continuous <-
sum(sapply(as.tibble(data[, cond_var]), is_continuous)) == 1
stopifnot(one_cond_continuous)
}
if (length(cond_var) == 1){
A <- cond_var
} else {
A <- cond_var[sapply(as.tibble(data[, cond_var]), is_continuous)]
stopifnot(length(A) == 1)
}
# aes on P(B|A,...) must be height or width
stopifnot(grepl("height", tail(aeses, n=1)) |
grepl("width", tail(aeses, n=1)))
# ============== P(B|A...) =============
group_var <- marg_var # TODO: wrong
cont_var <- A
# Partition
if (length(cond_var) == 1){
# P(B|A)
base_layout <- data.frame(.wt = 1, l = 0, r = 1, b = 0, t = 1, level = 1)
} else {
# P(B|A,...)
cond_mask <- !(sapply(as.tibble(data[, cond_var]), is_continuous))
wt <- margin(data, NULL, cond_var[cond_mask])
# TODO: get rid of A in the prob.struct
repeated_conds <- sapply(unlist(prob.struct$conditionals, use.names = FALSE),
function(i){paste0("p.", as.character(i))})
A_index <- match(A, unique(repeated_conds))
base_layout <- icon_divide(data = wt, prob.struct = prob.struct[-A_index,], offset = offset)
}
is_P_B_given_A <- TRUE
# stop("not implemented: P(B|A,...)")
}
} else {
# is it P(B, A|...)?
stopifnot(length(marg_var) == 2)
# B should be categorical
# A should be continuous
B <- marg_var[1]
A <- marg_var[2]
stopifnot(!is_continuous(data[, B]))
stopifnot(is_continuous(data[, A]))
# browser()
if (grepl("height", tail(aeses, n =2)[1])
&& grepl("x", tail(aeses, n =2)[1])){
if (grepl("height", tail(aeses, n =1))){
# oops
} else {
stop("P(B, A|...)? check P(B|A) orientation")
}
} else if (grepl("width", tail(aeses, n =1))
&& grepl("y", tail(aeses, n =2)[1])){
if (grepl("width", tail(aeses, n =2)[1])){
# oops
} else {
stop("P(B, A|...)? check P(B|A) orientation")
}
} else {
stop("Invalid aes mapping for P(B|A); only width and height are allowed")
}
# ======= P(B,A|...) ========
# stop()
cont_var <- A
group_var <- B
is_P_B_given_A <- TRUE
# partition
if (is.null(cond_var)){
# P(B,A)
# wt should not be calculated here
base_layout <- data.frame(.wt = 1, l = 0, r = 1, b = 0, t = 1, level = 1)
} else {
# P(B,A|...)
wt <- margin(data, marg_var, cond_var)
base_layout <- icon_divide(data = wt, prob.struct = prob.struct, offset = offset)
}
# stop("not implemented: P(B,A|...), y")
}
# ============================= density ========================
# want number of partitions on x,y axes
base_layout <- base_layout %>%
mutate(x_rank = dense_rank(l), y_rank = dense_rank(b))
# calculate a single bandwidth for all groups
if (is_string(bw)){
group_bw <- data %>%
pull(!!cont_var) %>%
bw.nrd0()
} else { # if bw is a set number
group_bw <- bw
}
# unnecessary
if (! is.null(group_var))
group_bw <- group_bw #/ length(unique(data[, group_var]))
# browser()
pieces <- as.list(dlply(data,group_var))
# if (group_var %in% names(base_layout)){
# bounds <- as.list(dlply(base_layout, group_var))
# } else {
#
#
# }
# get the range of continuous variable
continous_range <- range(select_(data, cont_var))
continuous_diff <- continous_range[2]- continous_range[1]
# estimate separate densities (grouped by aes)
densities <- ldply(seq_along(pieces), function(i) {
piece <- pieces[[i]]
# bound <- bounds[[i]]
range <- continous_range
# compute densities
# TODO: what's that $weight
#browser()
res <- compute_density(c(t(select(piece, cont_var))), NULL,
from = range[1],
to = range[2],
bw = group_bw,
adjust = adjust,
kernel = kernel,
n=n)
# keep other vars
#browser()
meta_data <- select(piece, -c("x", "y"))[1,]
meta_data$group <- i
if (! sum(is.na(res$density))){
# ====== ACHTUNG =========
# adjust positions to grids
# res$x <- res$x + (bound$x_rank - 1) * continuous_diff
# res$density <- res$density + (bound$y_rank - 1)
cbind(res, meta_data, row.names = NULL)
}
})
# if in the form P(B|A), normalize by category count
if (is_P_B_given_A){
# equivalent to stat(density * n)
# compute_density secretly does it already
densities$y <- densities$count
is_joint <- is_empty(cond_var)
# also, stack the y coordinate, similar to position = "stack"
# need to stack in the reverse order of group = 1, 2, 3, ...
if (is_joint){
densities$y <- densities %>%
mutate(rev_group = max(group) - group +1) %>%
arrange(rev_group) %>%
group_by(rev_group) %>%
mutate(gid = row_number()) %>%
group_by(gid) %>%
mutate(y = cumsum(y)) %>%
ungroup() %>%
arrange(group) %>%
.$y
} else {
densities$y <- densities %>%
mutate(rev_group = max(group) - group +1) %>%
arrange(rev_group) %>%
group_by(rev_group) %>%
mutate(gid = row_number()) %>%
group_by(gid) %>%
mutate(y = cumsum(y)) %>%
mutate(y = y / max(y)) %>% # fill logic
ungroup() %>%
arrange(group) %>%
.$y
}
} else {
densities$y <- densities$density
}
#browser()
if (is_ridges){
#browser()
# densities$y <-
densities <- densities %>%
mutate_(y = group_var) %>%
mutate(height = density)
}
#browser()
if("y.width" %in% prob.struct$aes ){
tmp = densities$x
densities$x = densities$y
densities$y = tmp
densities$flip = TRUE
} else {
densities$flip = FALSE
}
#browser()
if(side == "up"){
#return(densities)
densities$up = c(NA)
}
else if(side == "down"){
# densities$y = -densities$y
# return(densities)
densities$down = c(NA)
}
else if(side == "both"){
# temp = densities
# temp$y = -temp$y
# temp = temp %>% arrange(desc(x))
# densities = rbind(densities,temp)
# return(densities)
densities$both = c(NA)
}
return(densities)
print("error: side is either up, down, or both")
}
}
)
#' @references https://github.com/tidyverse/ggplot2/blob/660aad2db2b3495ae0d8040915a40d247133ffc0/R/stat-density.r
#'
#' Computed variables:
#' \describe{
#' \item{density}{density estimate}
#' \item{count}{density * number of points - useful for stacked density
#' plots}
#' \item{scaled}{density estimate, scaled to maximum of 1}
#' \item{ndensity}{alias for `scaled`, to mirror the syntax of
#' [`stat_bin()`]}
#' }
#' @importFrom stats density
compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
kernel = "gaussian", n = 512) {
nx <- length(x)
if (is.null(w)) {
w <- rep(1 / nx, nx)
}
# if less than 2 points return data frame of NAs and a warning
if (nx < 2) {
warning("Groups with fewer than two data points have been dropped.", call. = FALSE)
return(new_data_frame(list(
x = NA_real_,
density = NA_real_,
scaled = NA_real_,
ndensity = NA_real_,
count = NA_real_,
n = NA_integer_
), n = 1))
}
dens <- stats::density(x, weights = w, bw = bw, adjust = adjust,
kernel = kernel, n = n, from = from, to = to)
new_data_frame(list(
x = dens$x,
density = dens$y,
scaled = dens$y / max(dens$y, na.rm = TRUE),
ndensity = dens$y / max(dens$y, na.rm = TRUE),
count = dens$y * nx,
n = nx
), n = length(dens$x))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.