globalVariables(c("indices", "group", "n", "nobs", "frac", "n.x", "overlap",
"start", "end", "x"))
#' Range plot and spinoplot for runs of missings
#'
#' `na_rle_spineplot()` is an alias of `na_rle_spinoplot()`.
#'
#' @param data A data frame that contains [`list_of_na_rle()`].
#' @param x,y A bare variable contains [`list_of_na_rle()`] mapped to x and y.
#' @param facets A facetting variable.
#' @inheritParams ggplot2::autoplot
#' @param ...
#' * `na_rle_rangeplot()`: passed to [`ggplot2::geom_segment()`] & [`ggplot2::geom_point()`].
#' * `na_rle_spinoplot()`: passed to [`ggplot2::facet_wrap()`].
#'
#' @section Spinoplot:
#' `x`: lengths of runs of missings.
#' `width`: the number of missing observations in the run.
#' `y`: Either 1 or the fraction of intersections with other variables.
#' @rdname mists-plot
#' @examples
#' if (!requireNamespace("nycflights13", quietly = TRUE)) {
#' stop("Please install the nycflights13 package to run these following examples.")
#' }
#' library(dplyr, warn.conflicts = FALSE)
#' na_runs_wind <- nycflights13::weather %>%
#' group_by(origin) %>%
#' summarise_at(vars(contains("wind")), ~ list_of_na_rle(., time_hour))
#'
#' na_runs_wind %>%
#' na_rle_rangeplot(wind_dir, origin)
#' @export
na_rle_rangeplot <- function(data, x, y = NULL, ...) {
stopifnot(is.data.frame(data))
x <- eval_tidy(enquo(x), data = data)
y <- enquo(y)
if (quo_is_null(y)) {
y <- as.factor(seq_len(vec_size(data)))
} else {
y <- eval_tidy(y, data = data)
}
tbl_rng <-
new_data_frame(list(
start = start(x),
end = end(x),
y = rep(y, times = map_int(x, length))
))
data <-
ungroup(mutate(
group_by(tbl_rng, y),
"group" := paste(y, n(), sep = "-")
))
# separate params for geom_segment and geom_point from ...
params_list <- list2(...)
if (has_length(params_list, 0)) {
segment_params <- point_params <- list2()
} else {
names_params <- names(params_list)
segment_all <- c(GeomSegment$aesthetics(), GeomSegment$parameters(TRUE))
point_all <- c(GeomPoint$aesthetics(), GeomPoint$parameters(TRUE))
segment_params <- params_list[which(names_params %in% segment_all)]
point_params <- params_list[which(names_params %in% point_all)]
}
plot <- ggplot()
segment_params$data <- data
segment_params$mapping <-
aes(x = start, xend = end, y = y, yend = y, group = group)
segment_params$inherit.aes <- FALSE
plot <- plot + do.call(geom_segment, segment_params)
point_params$data <- data
point_params$inherit.aes <- FALSE
point_params$mapping <- aes(x = start, y = y)
plot <- plot + do.call(geom_point, point_params)
point_params$mapping <- aes(x = end, y = y)
plot <- plot + do.call(geom_point, point_params)
plot + labs(x = vec_ptype_full(data$start), y = "")
}
#' @rdname mists-plot
#' @examples
#' na_runs_wind %>%
#' na_rle_spinoplot(x = wind_dir, facets = origin)
#' na_runs_wind %>%
#' na_rle_spinoplot(x = wind_dir, y = wind_gust, facets = origin)
#' @export
na_rle_spinoplot <- function(data, x, y = NULL, facets = NULL, ...) {
# credits to @coolbutuseless
# the custom breaks could not be possible without this post
# https://coolbutuseless.github.io/2019/03/07/custom-axis-breaks-on-facetted-ggplot/
stopifnot(is.data.frame(data))
x <- enquo(x)
y <- enquo(y)
facets <- enquo(facets)
lst_na_rle_x <- eval_tidy(x, data = data)
if (quo_is_null(facets)) {
facets_vals <- as.factor(vec_seq_along(lst_na_rle_x))
} else {
facets_vals <- as.factor(eval_tidy(facets, data = data))
}
xlab <- paste("runs [frequency]", parenthesis(as_label(x)))
na_runs_lst_x <-
map2(
lst_na_rle_x, facets_vals,
function(.x, .y) mutate(na_rle_table(.x), "facets" := .y)
)
na_runs_x <- vec_rbind(!!! na_runs_lst_x)
na_runs_x <-
mutate(
group_by(na_runs_x, facets),
"xlabs" := paste(lengths, brackets(n), sep = "\n"),
"x" := .5 * c(cumsum(nobs) + cumsum(dplyr::lag(nobs, default = 0)))
)
count_label <- counter()
count_scale <- counter()
label_x <- function(x) {
which_facet <- levels(na_runs_x$facets)[count_label()]
filter(na_runs_x, facets == which_facet)[["xlabs"]]
}
scale_x <- function(x) {
which_facet <- levels(na_runs_x$facets)[count_scale()]
filter(na_runs_x, facets == which_facet)[["x"]]
}
if (quo_is_null(y)) {
ggplot(na_runs_x, aes(x = x, y = 1, width = nobs)) +
geom_bar(stat = "identity", colour = "white") +
scale_x_continuous(
labels = label_x,
breaks = scale_x,
minor_breaks = NULL
) +
facet_wrap(~ facets, scales = 'free_x', ...) +
labs(x = xlab, y = "")
} else {
lst_na_rle_y <- eval_tidy(y, data = data)
ylab <- paste("proportion of overlaps", parenthesis(as_label(y)))
x_full <- na_rle_expand2(lst_na_rle_x, facets = facets_vals)
y_full <- na_rle_expand2(lst_na_rle_y, facets = facets_vals)
intersect_xy <- semi_join(x_full, y_full, by = c("indices", "facets"))
overlaps_xy <- count(intersect_xy, lengths, facets)
inner_xy <- inner_join(overlaps_xy, na_runs_x, by = c("lengths", "facets"))
lgl <- factor(c("TRUE", "FALSE"), levels = c("TRUE", "FALSE"))
frac_intersect <-
transmute(
group_by(inner_xy, lengths, facets),
"frac" := n.x / nobs, "overlap" := lgl[1]
)
frac_diff <-
mutate(
group_by(frac_intersect, facets),
"frac" := 1 - frac, "overlap" := lgl[2]
)
frac_xy <- vec_rbind(frac_intersect, frac_diff)
ljoin_xy <- left_join(na_runs_x, frac_xy, by = c("lengths", "facets"))
na_runs_xy <-
mutate(
group_by(ljoin_xy, facets),
"overlap" := if_else(is.na(frac), lgl[1], overlap),
"frac" := if_else(is.na(frac), 1, frac)
)
ggplot(na_runs_xy, aes(x = x, y = frac, width = nobs, fill = overlap)) +
geom_bar(stat = "identity", colour = "white") +
scale_x_continuous(
labels = label_x,
breaks = scale_x,
minor_breaks = NULL
) +
facet_wrap(~ facets, scales = 'free_x', ...) +
labs(x = xlab, y = ylab)
}
}
#' @rdname mists-plot
#' @export
#' @usage NULL
na_rle_spineplot <- na_rle_spinoplot
#' @rdname mists-plot
#' @method autoplot rle_na
#' @export
autoplot.rle_na <- function(object, y = as.factor(1L), ...) {
autoplot.list_of_rle_na(as_list_of(object), y = y, ...)
}
#' @method autoplot list_of_rle_na
#' @export
autoplot.list_of_rle_na <- function(object, y = seq_along(object), ...) {
data <- new_data_frame(list(x = object, y = y))
na_rle_rangeplot(data, x = x, y = y)
}
#' Add a layer of `list_of_na_rle()` to an existing ggplot
#'
#' @param var A variable that contains `list_of_na_rle()`.
#' @param data A data frame.
#' @param ... Passed to [`ggplot2::geom_rect()`].
#' @examples
#' if (!requireNamespace("nycflights13", quietly = TRUE)) {
#' stop("Please install the nycflights13 package to run these following examples.")
#' }
#' library(dplyr, warn.conflicts = FALSE)
#' weather <- nycflights13::weather
#' na_runs_wind <- weather %>%
#' group_by(origin) %>%
#' summarise_at(
#' vars(contains("wind")),
#' ~ list_of_na_rle(., index_by = time_hour)
#' )
#'
#' library(ggplot2)
#' ggplot(weather, aes(x = time_hour, y = wind_gust)) +
#' geom_line(na.rm = TRUE) +
#' layer_na_rle(wind_gust, data = na_runs_wind, fill = "red", alpha = .5) +
#' facet_grid(origin ~ .)
#' @export
layer_na_rle <- function(var, data, ...) {
var_eval <- eval_tidy(enquo(var), data)
if (!is_list_of_rle_na(var_eval)) {
abort("`var` must be an object of `list_of_na_rle()`.")
}
cols_non_na_rle <- names(data[!map_lgl(data, is_list_of_rle_na)])
data <- data[cols_non_na_rle]
common_tunit <- common_tunit(var_eval)
na_rle_df <-
new_data_frame(list(
start = start(var_eval) - common_tunit,
end = end(var_eval) + common_tunit
))
base_idx <- vec_seq_along(data)
rep_times <- map_int(var_eval, length)
df_idx <- rep.int(base_idx, times = rep_times)
data <- data[df_idx, ]
data <- vec_cbind(data, na_rle_df)
geom_rect(
aes(xmin = start, xmax = end), ymin = -Inf, ymax = Inf,
data = data, ..., inherit.aes = FALSE
)
}
na_rle_expand2 <- function(x, ...) {
tbl <- add_column_id(x, ...)
new_col <- names(tbl)
y <- tbl[[new_col]]
res_lst <-
map2(x, y, function(.x, .y) mutate(na_rle_expand(.x), !! new_col := .y))
res <- bind_rows(!!! res_lst) # vec_rbind() should work here
indices_restore(res, x[[1L]])
}
add_column_id <- function(x, ...) {
qs <- enquos(..., .named = TRUE)
if (is_empty(qs)) {
y <- vec_seq_along(x)
new_col <- "id"
} else {
y <- eval_tidy(qs[[1]])
new_col <- names(qs)
}
list2(!! new_col := y)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.